aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86/bittest-big-integer.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/bittest-big-integer.ll')
-rw-r--r--llvm/test/CodeGen/X86/bittest-big-integer.ll159
1 files changed, 151 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index 9d31c29..9b7569f 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
@@ -956,6 +956,149 @@ define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
ret i1 %cmp
}
+; Load hidden behind bitcast
+define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i128_bitcast:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $80, %esp
+; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movzwl (%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzwl 12(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzwl 14(%eax), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shll $16, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: movzwl 2(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzwl 4(%eax), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzwl 6(%eax), %esi
+; X86-NEXT: movzwl 8(%eax), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzwl 10(%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: shll $16, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: shll $16, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $16, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 16(%ebp), %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: andb $96, %bl
+; X86-NEXT: shrb $3, %bl
+; X86-NEXT: movzbl %bl, %edi
+; X86-NEXT: movl 32(%esp,%edi), %edi
+; X86-NEXT: btcl %eax, %edi
+; X86-NEXT: andl $96, %eax
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl %edi, (%ecx,%eax)
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movw %dx, 14(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movw %dx, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, 10(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, 8(%eax)
+; X86-NEXT: movw %si, 6(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, 2(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movw %cx, (%eax)
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+;
+; SSE2-LABEL: complement_ne_i128_bitcast:
+; SSE2: # %bb.0:
+; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: movq 8(%rdi), %rax
+; SSE2-NEXT: movq %xmm0, %rdx
+; SSE2-NEXT: movl %esi, %ecx
+; SSE2-NEXT: andb $32, %cl
+; SSE2-NEXT: shrdq %cl, %rax, %rdx
+; SSE2-NEXT: shrq %cl, %rax
+; SSE2-NEXT: testb $64, %sil
+; SSE2-NEXT: cmoveq %rdx, %rax
+; SSE2-NEXT: btcl %esi, %eax
+; SSE2-NEXT: andl $96, %esi
+; SSE2-NEXT: shrl $3, %esi
+; SSE2-NEXT: movl %eax, (%rdi,%rsi)
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: complement_ne_i128_bitcast:
+; SSE4: # %bb.0:
+; SSE4-NEXT: # kill: def $esi killed $esi def $rsi
+; SSE4-NEXT: movdqa (%rdi), %xmm0
+; SSE4-NEXT: pextrq $1, %xmm0, %rax
+; SSE4-NEXT: movq %xmm0, %rdx
+; SSE4-NEXT: movl %esi, %ecx
+; SSE4-NEXT: andb $32, %cl
+; SSE4-NEXT: shrdq %cl, %rax, %rdx
+; SSE4-NEXT: shrq %cl, %rax
+; SSE4-NEXT: testb $64, %sil
+; SSE4-NEXT: cmoveq %rdx, %rax
+; SSE4-NEXT: btcl %esi, %eax
+; SSE4-NEXT: andl $96, %esi
+; SSE4-NEXT: shrl $3, %esi
+; SSE4-NEXT: movl %eax, (%rdi,%rsi)
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: complement_ne_i128_bitcast:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vmovq %xmm0, %rdx
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: andb $32, %cl
+; AVX-NEXT: shrdq %cl, %rax, %rdx
+; AVX-NEXT: shrxq %rcx, %rax, %rax
+; AVX-NEXT: testb $64, %sil
+; AVX-NEXT: cmoveq %rdx, %rax
+; AVX-NEXT: btcl %esi, %eax
+; AVX-NEXT: andl $96, %esi
+; AVX-NEXT: shrl $3, %esi
+; AVX-NEXT: movl %eax, (%rdi,%rsi)
+; AVX-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %ldv = load <8 x i16>, ptr %word
+ %ld = bitcast <8 x i16> %ldv to i128
+ %test = and i128 %ld, %bit
+ %res = xor i128 %ld, %bit
+ store i128 %res, ptr %word
+ ret <8 x i16> %ldv
+}
+
; Multiple loads in store chain
define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
; X86-LABEL: reset_multiload_i128:
@@ -975,10 +1118,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
; X86-NEXT: btrl %edx, %ebx
; X86-NEXT: btl %edx, %edi
; X86-NEXT: movl %ebx, (%ecx,%esi)
-; X86-NEXT: jae .LBB22_2
+; X86-NEXT: jae .LBB23_2
; X86-NEXT: # %bb.1:
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: .LBB22_2:
+; X86-NEXT: .LBB23_2:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -994,10 +1137,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
; X64-NEXT: btrl %esi, %r8d
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: btl %esi, %r9d
-; X64-NEXT: jb .LBB22_2
+; X64-NEXT: jb .LBB23_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movl (%rdx), %eax
-; X64-NEXT: .LBB22_2:
+; X64-NEXT: .LBB23_2:
; X64-NEXT: movl %r8d, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 127
@@ -1046,10 +1189,10 @@ define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind
; X86-NEXT: movl %edi, (%edx)
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: orl %ecx, %ebp
-; X86-NEXT: jne .LBB23_2
+; X86-NEXT: jne .LBB24_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl %esi, %eax
-; X86-NEXT: .LBB23_2:
+; X86-NEXT: .LBB24_2:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx