aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86/trunc-srl-load.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/trunc-srl-load.ll')
-rw-r--r--llvm/test/CodeGen/X86/trunc-srl-load.ll1652
1 files changed, 108 insertions, 1544 deletions
diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll
index 4dae143..d9c21d3 100644
--- a/llvm/test/CodeGen/X86/trunc-srl-load.ll
+++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64
; Tests showing for the analysis of non-constant shift amounts to improve load address math
@@ -12,42 +12,20 @@
define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub64_16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl 4(%eax), %esi
-; X86-NEXT: movb %ch, %cl
-; X86-NEXT: andb $16, %cl
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: shrdl %cl, %esi, %edx
-; X86-NEXT: testb $32, %ch
-; X86-NEXT: jne .LBB0_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB0_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $48, %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: movzwl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub64_16:
-; SSE: # %bb.0:
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: movq (%rdi), %rax
-; SSE-NEXT: andb $48, %cl
-; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
-; SSE-NEXT: shrq %cl, %rax
-; SSE-NEXT: # kill: def $ax killed $ax killed $rax
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub64_16:
-; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX-NEXT: andb $48, %sil
-; AVX-NEXT: shrxq %rsi, (%rdi), %rax
-; AVX-NEXT: # kill: def $ax killed $ax killed $rax
-; AVX-NEXT: retq
+; X64-LABEL: extractSub64_16:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $48, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movzwl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 63
%idx_align = and i32 %idx_bounds, -16
%sh = zext nneg i32 %idx_align to i64
@@ -60,67 +38,20 @@ define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub128_16:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movzbl 12(%ebp), %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: movl 8(%ecx), %edi
-; X86-NEXT: movl 12(%ecx), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andb $16, %cl
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: andb $12, %al
-; X86-NEXT: movzbl %al, %edx
-; X86-NEXT: movl (%esp,%edx), %eax
-; X86-NEXT: movl 4(%esp,%edx), %edx
-; X86-NEXT: shrdl %cl, %edx, %eax
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: leal -8(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $112, %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: movzwl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub128_16:
-; SSE: # %bb.0:
-; SSE-NEXT: movq (%rdi), %rax
-; SSE-NEXT: movq 8(%rdi), %rdx
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: andb $48, %cl
-; SSE-NEXT: movq %rdx, %rdi
-; SSE-NEXT: shrq %cl, %rdi
-; SSE-NEXT: shrdq %cl, %rdx, %rax
-; SSE-NEXT: testb $64, %sil
-; SSE-NEXT: cmovneq %rdi, %rax
-; SSE-NEXT: # kill: def $ax killed $ax killed $rax
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub128_16:
-; AVX: # %bb.0:
-; AVX-NEXT: movq (%rdi), %rdx
-; AVX-NEXT: movq 8(%rdi), %rax
-; AVX-NEXT: movl %esi, %ecx
-; AVX-NEXT: andb $48, %cl
-; AVX-NEXT: shrdq %cl, %rax, %rdx
-; AVX-NEXT: shrxq %rcx, %rax, %rax
-; AVX-NEXT: testb $64, %sil
-; AVX-NEXT: cmoveq %rdx, %rax
-; AVX-NEXT: # kill: def $ax killed $ax killed $rax
-; AVX-NEXT: retq
+; X64-LABEL: extractSub128_16:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $112, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movzwl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 127
%idx_align = and i32 %idx_bounds, -16
%sh = zext nneg i32 %idx_align to i128
@@ -133,62 +64,20 @@ define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub128_32:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movzbl 12(%ebp), %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: movl 8(%ecx), %edi
-; X86-NEXT: movl 12(%ecx), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: andb $96, %al
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl (%esp,%eax), %eax
-; X86-NEXT: leal -8(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $96, %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: movl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub128_32:
-; SSE: # %bb.0:
-; SSE-NEXT: movq (%rdi), %rax
-; SSE-NEXT: movq 8(%rdi), %rdx
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: andb $32, %cl
-; SSE-NEXT: movq %rdx, %rdi
-; SSE-NEXT: shrq %cl, %rdi
-; SSE-NEXT: shrdq %cl, %rdx, %rax
-; SSE-NEXT: testb $64, %sil
-; SSE-NEXT: cmovneq %rdi, %rax
-; SSE-NEXT: # kill: def $eax killed $eax killed $rax
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub128_32:
-; AVX: # %bb.0:
-; AVX-NEXT: movq (%rdi), %rdx
-; AVX-NEXT: movq 8(%rdi), %rax
-; AVX-NEXT: movl %esi, %ecx
-; AVX-NEXT: andb $32, %cl
-; AVX-NEXT: shrdq %cl, %rax, %rdx
-; AVX-NEXT: shrxq %rcx, %rax, %rax
-; AVX-NEXT: testb $64, %sil
-; AVX-NEXT: cmoveq %rdx, %rax
-; AVX-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX-NEXT: retq
+; X64-LABEL: extractSub128_32:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $96, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 127
%idx_align = and i32 %idx_bounds, -32
%sh = zext nneg i32 %idx_align to i128
@@ -201,46 +90,20 @@ define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub128_64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movzbl 12(%ebp), %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: movl 8(%ecx), %edi
-; X86-NEXT: movl 12(%ecx), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: andb $64, %al
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: movzbl %al, %ecx
-; X86-NEXT: movl (%esp,%ecx), %eax
-; X86-NEXT: movl 4(%esp,%ecx), %edx
-; X86-NEXT: leal -8(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $64, %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: movl (%ecx,%edx), %eax
+; X86-NEXT: movl 4(%ecx,%edx), %edx
; X86-NEXT: retl
;
; X64-LABEL: extractSub128_64:
; X64: # %bb.0:
-; X64-NEXT: testb $64, %sil
-; X64-NEXT: je .LBB3_1
-; X64-NEXT: # %bb.2:
-; X64-NEXT: movq 8(%rdi), %rax
-; X64-NEXT: retq
-; X64-NEXT: .LBB3_1:
-; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $64, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
; X64-NEXT: retq
%idx_bounds = and i32 %idx, 127
%idx_align = and i32 %idx_bounds, -64
@@ -254,185 +117,20 @@ define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub512_8:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ebx
-; X86-NEXT: movl 44(%eax), %edi
-; X86-NEXT: movl 48(%eax), %esi
-; X86-NEXT: movl 52(%eax), %edx
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl 60(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 12(%ebp), %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: andl $24, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %edx
-; X86-NEXT: andl $60, %edx
-; X86-NEXT: movl 48(%esp,%edx), %eax
-; X86-NEXT: movl 52(%esp,%edx), %edx
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrdl %cl, %edx, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: leal -12(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: andl $63, %ecx
+; X86-NEXT: movzbl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub512_8:
-; SSE: # %bb.0:
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movups 16(%rdi), %xmm1
-; SSE-NEXT: movups 32(%rdi), %xmm2
-; SSE-NEXT: movups 48(%rdi), %xmm3
-; SSE-NEXT: xorps %xmm4, %xmm4
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: andl $56, %ecx
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: andl $56, %esi
-; SSE-NEXT: movq -128(%rsp,%rsi), %rdx
-; SSE-NEXT: shrq %cl, %rdx
-; SSE-NEXT: movl -120(%rsp,%rsi), %eax
-; SSE-NEXT: addl %eax, %eax
-; SSE-NEXT: notl %ecx
-; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
-; SSE-NEXT: shlq %cl, %rax
-; SSE-NEXT: orl %edx, %eax
-; SSE-NEXT: # kill: def $al killed $al killed $rax
-; SSE-NEXT: popq %rcx
-; SSE-NEXT: retq
-;
-; AVX2-LABEL: extractSub512_8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: pushq %rax
-; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT: vmovups (%rdi), %ymm0
-; AVX2-NEXT: vmovups 32(%rdi), %ymm1
-; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: movl %esi, %ecx
-; AVX2-NEXT: andl $56, %ecx
-; AVX2-NEXT: shrl $3, %esi
-; AVX2-NEXT: andl $56, %esi
-; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: notl %ecx
-; AVX2-NEXT: movl -120(%rsp,%rsi), %edx
-; AVX2-NEXT: addl %edx, %edx
-; AVX2-NEXT: shlxq %rcx, %rdx, %rcx
-; AVX2-NEXT: orl %ecx, %eax
-; AVX2-NEXT: # kill: def $al killed $al killed $rax
-; AVX2-NEXT: popq %rcx
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: extractSub512_8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %rax
-; AVX512-NEXT: vmovups (%rdi), %ymm0
-; AVX512-NEXT: vmovups 32(%rdi), %ymm1
-; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512-NEXT: movl %esi, %ecx
-; AVX512-NEXT: andl $56, %ecx
-; AVX512-NEXT: shrl $3, %esi
-; AVX512-NEXT: andl $56, %esi
-; AVX512-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
-; AVX512-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX512-NEXT: notl %ecx
-; AVX512-NEXT: movl -120(%rsp,%rsi), %edx
-; AVX512-NEXT: addl %edx, %edx
-; AVX512-NEXT: shlxq %rcx, %rdx, %rcx
-; AVX512-NEXT: orl %ecx, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $rax
-; AVX512-NEXT: popq %rcx
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; X64-LABEL: extractSub512_8:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: andl $63, %esi
+; X64-NEXT: movzbl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 511
%idx_align = and i32 %idx_bounds, -8
%ld = load i512, ptr %word, align 8
@@ -445,152 +143,21 @@ define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub512_64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ebx
-; X86-NEXT: movl 44(%eax), %edi
-; X86-NEXT: movl 48(%eax), %esi
-; X86-NEXT: movl 52(%eax), %edx
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl 60(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %ecx
-; X86-NEXT: andl $56, %ecx
-; X86-NEXT: movl 48(%esp,%ecx), %eax
-; X86-NEXT: movl 52(%esp,%ecx), %edx
-; X86-NEXT: leal -12(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: andl $56, %edx
+; X86-NEXT: movl (%ecx,%edx), %eax
+; X86-NEXT: movl 4(%ecx,%edx), %edx
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub512_64:
-; SSE: # %bb.0:
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movups 16(%rdi), %xmm1
-; SSE-NEXT: movups 32(%rdi), %xmm2
-; SSE-NEXT: movups 48(%rdi), %xmm3
-; SSE-NEXT: xorps %xmm4, %xmm4
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: andl $56, %esi
-; SSE-NEXT: movq -128(%rsp,%rsi), %rax
-; SSE-NEXT: popq %rcx
-; SSE-NEXT: retq
-;
-; AVX2-LABEL: extractSub512_64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: pushq %rax
-; AVX2-NEXT: vmovups (%rdi), %ymm0
-; AVX2-NEXT: vmovups 32(%rdi), %ymm1
-; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT: shrl $3, %esi
-; AVX2-NEXT: andl $56, %esi
-; AVX2-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX2-NEXT: popq %rcx
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: extractSub512_64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %rax
-; AVX512-NEXT: vmovups (%rdi), %ymm0
-; AVX512-NEXT: vmovups 32(%rdi), %ymm1
-; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: shrl $3, %esi
-; AVX512-NEXT: andl $56, %esi
-; AVX512-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX512-NEXT: popq %rcx
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; X64-LABEL: extractSub512_64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: andl $56, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 511
%idx_align = and i32 %idx_bounds, -64
%sh = zext nneg i32 %idx_align to i512
@@ -603,143 +170,35 @@ define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub512_128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 12(%ebp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ebx
-; X86-NEXT: movl 44(%eax), %edi
-; X86-NEXT: movl 48(%eax), %esi
-; X86-NEXT: movl 52(%eax), %edx
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl 60(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %edi
-; X86-NEXT: andl $48, %edi
-; X86-NEXT: movl 48(%esp,%edi), %ecx
-; X86-NEXT: movl 52(%esp,%edi), %edx
-; X86-NEXT: movl 56(%esp,%edi), %esi
-; X86-NEXT: movl 60(%esp,%edi), %edi
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %edx, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: andl $48, %edx
+; X86-NEXT: movl (%ecx,%edx), %esi
+; X86-NEXT: movl 4(%ecx,%edx), %edi
+; X86-NEXT: movl 8(%ecx,%edx), %ebx
+; X86-NEXT: movl 12(%ecx,%edx), %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
-; SSE-LABEL: extractSub512_128:
-; SSE: # %bb.0:
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movups 16(%rdi), %xmm1
-; SSE-NEXT: movups 32(%rdi), %xmm2
-; SSE-NEXT: movups 48(%rdi), %xmm3
-; SSE-NEXT: xorps %xmm4, %xmm4
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: andl $48, %esi
-; SSE-NEXT: movq -128(%rsp,%rsi), %rax
-; SSE-NEXT: movq -120(%rsp,%rsi), %rdx
-; SSE-NEXT: popq %rcx
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub512_128:
-; AVX: # %bb.0:
-; AVX-NEXT: pushq %rax
-; AVX-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX-NEXT: vmovups (%rdi), %ymm0
-; AVX-NEXT: vmovups 32(%rdi), %ymm1
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: shrl $3, %esi
-; AVX-NEXT: andl $48, %esi
-; AVX-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX-NEXT: movq -120(%rsp,%rsi), %rdx
-; AVX-NEXT: popq %rcx
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; X64-LABEL: extractSub512_128:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: andl $48, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
+; X64-NEXT: movq 8(%rdi,%rsi), %rdx
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 511
%idx_align = and i32 %idx_bounds, -128
%sh = zext nneg i32 %idx_align to i512
@@ -752,916 +211,21 @@ define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {
define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub4096_64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $1536, %esp # imm = 0x600
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 44(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 48(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 52(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 60(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 64(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 68(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 72(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 76(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 80(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 84(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 88(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 92(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 96(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 100(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 104(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 108(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 112(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 116(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 120(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 124(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 128(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 132(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 136(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 140(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 144(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 148(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 152(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 156(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 160(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 164(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 168(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 172(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 176(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 180(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 184(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 188(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 192(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 196(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 200(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 204(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 208(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 212(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 216(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 220(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 224(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 228(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 232(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 236(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 240(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 244(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 248(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 252(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 256(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 260(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 264(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 268(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 272(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 276(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 280(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 284(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 288(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 292(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 296(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 300(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 304(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 308(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 312(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 316(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 320(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 324(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 328(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 332(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 336(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 340(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 344(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 348(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 352(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 356(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 360(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 364(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 368(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 372(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 376(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 380(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 384(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 388(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 392(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 396(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 400(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 404(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 408(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 412(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 416(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 420(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 424(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 428(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 432(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 436(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 440(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 444(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 448(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 452(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 456(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 460(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 464(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 468(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 472(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 476(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 480(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 484(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 488(%eax), %ebx
-; X86-NEXT: movl 492(%eax), %edi
-; X86-NEXT: movl 496(%eax), %esi
-; X86-NEXT: movl 500(%eax), %edx
-; X86-NEXT: movl 504(%eax), %ecx
-; X86-NEXT: movl 508(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $4032, %ecx # imm = 0xFC0
-; X86-NEXT: andl 12(%ebp), %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %ecx
-; X86-NEXT: movl 496(%esp,%ecx), %eax
-; X86-NEXT: movl 500(%esp,%ecx), %edx
-; X86-NEXT: leal -12(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $4032, %edx # imm = 0xFC0
+; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: movl (%ecx,%edx), %eax
+; X86-NEXT: movl 4(%ecx,%edx), %edx
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub4096_64:
-; SSE: # %bb.0:
-; SSE-NEXT: subq $1176, %rsp # imm = 0x498
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 16(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 32(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 48(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 64(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 80(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 96(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 112(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 128(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; SSE-NEXT: movups 144(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 160(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 176(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 192(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 208(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 224(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 240(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 256(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 272(%rdi), %xmm15
-; SSE-NEXT: movups 288(%rdi), %xmm14
-; SSE-NEXT: movups 304(%rdi), %xmm13
-; SSE-NEXT: movups 320(%rdi), %xmm12
-; SSE-NEXT: movups 336(%rdi), %xmm11
-; SSE-NEXT: movups 352(%rdi), %xmm10
-; SSE-NEXT: movups 368(%rdi), %xmm9
-; SSE-NEXT: movups 384(%rdi), %xmm8
-; SSE-NEXT: movups 400(%rdi), %xmm7
-; SSE-NEXT: movups 416(%rdi), %xmm6
-; SSE-NEXT: movups 432(%rdi), %xmm5
-; SSE-NEXT: movups 448(%rdi), %xmm4
-; SSE-NEXT: movups 464(%rdi), %xmm3
-; SSE-NEXT: movups 480(%rdi), %xmm2
-; SSE-NEXT: movups 496(%rdi), %xmm1
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm5, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm8, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm9, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm10, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm11, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm12, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm13, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm14, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm15, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: andl $4032, %esi # imm = 0xFC0
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: movq 144(%rsp,%rsi), %rax
-; SSE-NEXT: addq $1176, %rsp # imm = 0x498
-; SSE-NEXT: retq
-;
-; AVX2-LABEL: extractSub4096_64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: subq $936, %rsp # imm = 0x3A8
-; AVX2-NEXT: vmovups (%rdi), %ymm0
-; AVX2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-NEXT: vmovups 32(%rdi), %ymm1
-; AVX2-NEXT: vmovups 64(%rdi), %ymm2
-; AVX2-NEXT: vmovups 96(%rdi), %ymm3
-; AVX2-NEXT: vmovups 128(%rdi), %ymm4
-; AVX2-NEXT: vmovups 160(%rdi), %ymm5
-; AVX2-NEXT: vmovups 192(%rdi), %ymm6
-; AVX2-NEXT: vmovups 224(%rdi), %ymm7
-; AVX2-NEXT: vmovups 256(%rdi), %ymm8
-; AVX2-NEXT: vmovups 288(%rdi), %ymm9
-; AVX2-NEXT: vmovups 320(%rdi), %ymm10
-; AVX2-NEXT: vmovups 352(%rdi), %ymm11
-; AVX2-NEXT: vmovups 384(%rdi), %ymm12
-; AVX2-NEXT: vmovups 416(%rdi), %ymm13
-; AVX2-NEXT: vmovups 448(%rdi), %ymm14
-; AVX2-NEXT: vmovups 480(%rdi), %ymm15
-; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm3, (%rsp)
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
-; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT: andl $4032, %esi # imm = 0xFC0
-; AVX2-NEXT: shrl $3, %esi
-; AVX2-NEXT: movq -96(%rsp,%rsi), %rax
-; AVX2-NEXT: addq $936, %rsp # imm = 0x3A8
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: extractSub4096_64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: subq $904, %rsp # imm = 0x388
-; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512-NEXT: vmovups (%rdi), %ymm0
-; AVX512-NEXT: vmovups 32(%rdi), %ymm1
-; AVX512-NEXT: vmovups 64(%rdi), %ymm2
-; AVX512-NEXT: vmovups 96(%rdi), %ymm3
-; AVX512-NEXT: vmovups 128(%rdi), %ymm4
-; AVX512-NEXT: vmovups 160(%rdi), %ymm5
-; AVX512-NEXT: vmovups 192(%rdi), %ymm6
-; AVX512-NEXT: vmovups 224(%rdi), %ymm7
-; AVX512-NEXT: vmovups 256(%rdi), %ymm8
-; AVX512-NEXT: vmovups 288(%rdi), %ymm9
-; AVX512-NEXT: vmovups 320(%rdi), %ymm10
-; AVX512-NEXT: vmovups 352(%rdi), %ymm11
-; AVX512-NEXT: vmovups 384(%rdi), %ymm12
-; AVX512-NEXT: vmovups 416(%rdi), %ymm13
-; AVX512-NEXT: andl $4032, %esi # imm = 0xFC0
-; AVX512-NEXT: vmovups 448(%rdi), %ymm14
-; AVX512-NEXT: vmovups 480(%rdi), %ymm15
-; AVX512-NEXT: vxorps %xmm16, %xmm16, %xmm16
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm4, (%rsp)
-; AVX512-NEXT: vmovups %ymm3, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: shrl $3, %esi
-; AVX512-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX512-NEXT: addq $904, %rsp # imm = 0x388
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; X64-LABEL: extractSub4096_64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $4032, %esi # imm = 0xFC0
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 4095
%idx_align = and i32 %idx_bounds, -64
%sh = zext nneg i32 %idx_align to i4096