diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll | 78 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fshl.ll | 81 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fshr.ll | 90 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr161693.ll | 40 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sbb.ll | 29 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/shift-i128.ll | 3 |
6 files changed, 173 insertions, 148 deletions
diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll index 6ef7219..9cf7aab 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll @@ -56,14 +56,9 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind { ; CHECK-LABEL: PR90954: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 -; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; CHECK-NEXT: subq $5120, %rsp # imm = 0x1400 +; CHECK-NEXT: subq $2912, %rsp # imm = 0xB60 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) @@ -79,29 +74,26 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind { ; CHECK-NEXT: movw $64, %cx ; CHECK-NEXT: movw $16, %di ; CHECK-NEXT: movb $1, %r8b -; CHECK-NEXT: movl $64, %r9d -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r11 -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: xorl %r9d, %r9d +; CHECK-NEXT: xorl %r10d, %r10d ; CHECK-NEXT: jmp .LBB1_1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_5: # in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: incq %r14 -; CHECK-NEXT: addl %edx, %ebx +; CHECK-NEXT: incq %r10 +; CHECK-NEXT: addl %edx, %r9d ; CHECK-NEXT: .LBB1_1: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB1_2 Depth 2 -; CHECK-NEXT: movslq %ebx, %r15 -; CHECK-NEXT: leaq (%rsi,%r15,4), %r15 -; CHECK-NEXT: xorl %r12d, %r12d -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movslq %r9d, %r11 +; CHECK-NEXT: leaq (%rsi,%r11,4), %r11 +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: jmp .LBB1_2 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_4: # in Loop: Header=BB1_2 Depth=2 -; CHECK-NEXT: tilestored %tmm1, (%r15,%rax) -; CHECK-NEXT: incq %r13 -; CHECK-NEXT: addq $64, %r15 -; CHECK-NEXT: decq %r12 +; CHECK-NEXT: tilestored %tmm1, (%r11,%rax) +; CHECK-NEXT: incq %r14 +; CHECK-NEXT: addq $64, %r11 +; CHECK-NEXT: decq %rbx ; CHECK-NEXT: je .LBB1_5 ; CHECK-NEXT: .LBB1_2: # Parent Loop BB1_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 @@ -110,46 +102,12 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind { ; CHECK-NEXT: testb %r8b, %r8b ; CHECK-NEXT: jne .LBB1_4 ; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=2 -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: tileloadd (%r10,%r9), %tmm1 -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: tileloadd (%r11,%r9), %tmm2 +; CHECK-NEXT: tilezero %tmm1 +; CHECK-NEXT: tilezero %tmm2 ; CHECK-NEXT: tdpbf16ps %tmm2, %tmm1, %tmm0 -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movabsq $64, %rax -; CHECK-NEXT: tilestored %tmm0, 3072(%rsp,%rax) # 1024-byte Folded Spill -; CHECK-NEXT: tileloadd 3072(%rsp,%rax), %tmm1 # 1024-byte Folded Reload -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-NEXT: movabsq $64, %rbp +; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill +; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm1 # 1024-byte Folded Reload ; CHECK-NEXT: jmp .LBB1_4 %4 = shl i32 %2, 4 %5 = icmp eq i64 0, 0 diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index ec1b8a3..f998128 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -335,84 +335,83 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: andl $-16, %esp ; X86-SLOW-NEXT: subl $32, %esp -; X86-SLOW-NEXT: movl 24(%ebp), %esi +; X86-SLOW-NEXT: movl 24(%ebp), %edi ; X86-SLOW-NEXT: movl 28(%ebp), %eax ; X86-SLOW-NEXT: movl 48(%ebp), %edx ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: testb $64, %cl -; X86-SLOW-NEXT: movl 52(%ebp), %edi +; X86-SLOW-NEXT: movl 52(%ebp), %ebx ; X86-SLOW-NEXT: jne .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: ; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %esi, %edx -; X86-SLOW-NEXT: movl 32(%ebp), %esi -; X86-SLOW-NEXT: movl %edi, %ecx -; X86-SLOW-NEXT: movl %eax, %edi +; X86-SLOW-NEXT: movl %edi, %edx +; X86-SLOW-NEXT: movl 32(%ebp), %edi +; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, %ebx ; X86-SLOW-NEXT: movl 36(%ebp), %eax ; X86-SLOW-NEXT: jmp .LBB6_3 ; X86-SLOW-NEXT: .LBB6_1: ; X86-SLOW-NEXT: movl 40(%ebp), %ecx ; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl 44(%ebp), %ecx +; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: .LBB6_3: -; X86-SLOW-NEXT: movl 56(%ebp), %ebx -; X86-SLOW-NEXT: testb $32, %bl +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: testb $32, %cl ; X86-SLOW-NEXT: jne .LBB6_4 ; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: movl %ecx, %ebx ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, %edi +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: jmp .LBB6_6 ; X86-SLOW-NEXT: .LBB6_4: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, %edx -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, %ebx +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-SLOW-NEXT: .LBB6_6: -; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl %edi, %eax +; X86-SLOW-NEXT: shll %cl, %eax +; X86-SLOW-NEXT: shrl %esi +; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: notb %dl +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: orl %eax, %esi +; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebx, %eax ; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: movl %ebx, %edi +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shll %cl, %eax ; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: movl %ecx, %ebx -; X86-SLOW-NEXT: notb %bl -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: orl %esi, %edi +; X86-SLOW-NEXT: orl %eax, %edi ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-SLOW-NEXT: movl %esi, %eax ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: shrl %edx -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shrl %cl, %edx -; X86-SLOW-NEXT: orl %eax, %edx -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-SLOW-NEXT: movl %ebx, %eax +; X86-SLOW-NEXT: shrl %ebx +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shrl %cl, %ebx +; X86-SLOW-NEXT: orl %eax, %ebx ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-SLOW-NEXT: shll %cl, %eax ; X86-SLOW-NEXT: shrl %esi -; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shrl %cl, %esi ; X86-SLOW-NEXT: orl %eax, %esi -; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: shrl %ebx -; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X86-SLOW-NEXT: shrl %cl, %ebx -; X86-SLOW-NEXT: orl %eax, %ebx ; X86-SLOW-NEXT: movl 8(%ebp), %eax -; X86-SLOW-NEXT: movl %ebx, 12(%eax) -; X86-SLOW-NEXT: movl %esi, 8(%eax) -; X86-SLOW-NEXT: movl %edx, 4(%eax) -; X86-SLOW-NEXT: movl %edi, (%eax) +; X86-SLOW-NEXT: movl %esi, 12(%eax) +; X86-SLOW-NEXT: movl %ebx, 8(%eax) +; X86-SLOW-NEXT: movl %edi, 4(%eax) +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SLOW-NEXT: movl %ecx, (%eax) ; X86-SLOW-NEXT: leal -12(%ebp), %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index 544ab7f..c307833 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -322,79 +322,79 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: subl $16, %esp ; X86-SLOW-NEXT: movl 24(%ebp), %edx ; X86-SLOW-NEXT: movl 28(%ebp), %esi -; X86-SLOW-NEXT: movl 48(%ebp), %ebx +; X86-SLOW-NEXT: movl 48(%ebp), %edi ; X86-SLOW-NEXT: movl 56(%ebp), %eax ; X86-SLOW-NEXT: testb $64, %al -; X86-SLOW-NEXT: movl 52(%ebp), %edi +; X86-SLOW-NEXT: movl 52(%ebp), %eax ; X86-SLOW-NEXT: je .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl %edx, %ebx +; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, %edi ; X86-SLOW-NEXT: movl 32(%ebp), %edx -; X86-SLOW-NEXT: movl %edi, %eax -; X86-SLOW-NEXT: movl %esi, %edi +; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: movl %esi, %eax ; X86-SLOW-NEXT: movl 36(%ebp), %esi ; X86-SLOW-NEXT: jmp .LBB6_3 ; X86-SLOW-NEXT: .LBB6_1: -; X86-SLOW-NEXT: movl 40(%ebp), %eax -; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl 44(%ebp), %eax +; X86-SLOW-NEXT: movl 40(%ebp), %ecx +; X86-SLOW-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl 44(%ebp), %ecx ; X86-SLOW-NEXT: .LBB6_3: -; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: testb $32, %cl +; X86-SLOW-NEXT: movl 56(%ebp), %ebx +; X86-SLOW-NEXT: testb $32, %bl ; X86-SLOW-NEXT: je .LBB6_4 ; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %ecx, %ebx ; X86-SLOW-NEXT: jmp .LBB6_6 ; X86-SLOW-NEXT: .LBB6_4: ; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %eax, %ebx -; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SLOW-NEXT: movl %ecx, %edi +; X86-SLOW-NEXT: movl (%esp), %ebx # 4-byte Reload ; X86-SLOW-NEXT: .LBB6_6: -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: movl %ecx, %eax -; X86-SLOW-NEXT: notb %al -; X86-SLOW-NEXT: movl %ebx, %edi -; X86-SLOW-NEXT: addl %ebx, %ebx -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: orl %edx, %ebx -; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: orl %edi, %edx +; X86-SLOW-NEXT: shrl %cl, %ebx +; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: notb %dl +; X86-SLOW-NEXT: movl %edi, %eax +; X86-SLOW-NEXT: addl %edi, %edi +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: orl %ebx, %edi +; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: shrl %cl, %ebx -; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: shrl %cl, %eax ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-SLOW-NEXT: leal (%edi,%edi), %ebx -; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-SLOW-NEXT: orl %eax, %ebx ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SLOW-NEXT: leal (%eax,%eax), %edi +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shrl %cl, %eax ; X86-SLOW-NEXT: addl %esi, %esi -; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: orl %edi, %esi -; X86-SLOW-NEXT: movl 8(%ebp), %ecx -; X86-SLOW-NEXT: movl %esi, 12(%ecx) -; X86-SLOW-NEXT: movl %ebx, 8(%ecx) -; X86-SLOW-NEXT: movl %edx, 4(%ecx) -; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-SLOW-NEXT: movl %eax, (%ecx) -; X86-SLOW-NEXT: movl %ecx, %eax +; X86-SLOW-NEXT: orl %eax, %esi +; X86-SLOW-NEXT: movl 8(%ebp), %eax +; X86-SLOW-NEXT: movl %esi, 12(%eax) +; X86-SLOW-NEXT: movl %edi, 8(%eax) +; X86-SLOW-NEXT: movl %ebx, 4(%eax) +; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-SLOW-NEXT: movl %ecx, (%eax) ; X86-SLOW-NEXT: leal -12(%ebp), %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/pr161693.ll b/llvm/test/CodeGen/X86/pr161693.ll new file mode 100644 index 0000000..de8188f --- /dev/null +++ b/llvm/test/CodeGen/X86/pr161693.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s + +define void @PR161693() #0 { +; CHECK-LABEL: PR161693: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: movzbl (%rax), %eax +; CHECK-NEXT: andb $-33, %al +; CHECK-NEXT: addb $-71, %al +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $-6, %al +; CHECK-NEXT: setb %cl +; CHECK-NEXT: leal (%rcx,%rcx), %edx +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: leal (,%rdx,4), %ecx +; CHECK-NEXT: orb %dl, %cl +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: retq +start: + br label %loop + +loop: + %.val.i.i89 = load <16 x i8>, ptr poison, align 1 + %.not49.i = icmp ult <16 x i8> zeroinitializer, splat (i8 -10) + %i = and <16 x i8> %.val.i.i89, splat (i8 -33) + %i1 = add <16 x i8> %i, splat (i8 -71) + %.not51.i = icmp ult <16 x i8> %i1, splat (i8 -6) + %.not46.i = and <16 x i1> %.not49.i, %.not51.i + %i2 = bitcast <16 x i1> %.not46.i to i16 + %_0.i = icmp eq i16 %i2, 0 + br i1 %_0.i, label %loop, label %exit + +exit: + ret void +} + +attributes #0 = { "target-features"="+soft-float" } diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll index 78d609d..f5a3468 100644 --- a/llvm/test/CodeGen/X86/sbb.ll +++ b/llvm/test/CodeGen/X86/sbb.ll @@ -365,3 +365,32 @@ define i32 @uge_sext_add(i32 %0, i32 %1, i32 %2) { %6 = add nsw i32 %5, %0 ret i32 %6 } + +define i32 @sub_sub_ugt(i32 %a, i32 %b) { +; CHECK-LABEL: sub_sub_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: sbbl %esi, %eax +; CHECK-NEXT: retq + %cmp = icmp ugt i32 %a, %b + %conv = zext i1 %cmp to i32 + %sub = sub i32 %a, %b + %res = sub i32 %sub, %conv + ret i32 %res +} + +define i32 @sub_sub_ult(i32 %a, i32 %b) { +; CHECK-LABEL: sub_sub_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: sbbl %esi, %eax +; CHECK-NEXT: retq + %cmp = icmp ult i32 %b, %a + %conv = zext i1 %cmp to i32 + %sub = sub i32 %a, %b + %res = sub i32 %sub, %conv + ret i32 %res +} + diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll index 7462c77..049ee47 100644 --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -613,8 +613,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou ; i686-NEXT: shldl %cl, %esi, %ebx ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; i686-NEXT: shll %cl, %esi ; i686-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: negl %edx |