diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
99 files changed, 3778 insertions, 2702 deletions
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll index 28b4541..7bdc4e1 100644 --- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll @@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: callq __ubyte_convert_to_ctype ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_6 +; CHECK-NEXT: js LBB0_4 ; CHECK-NEXT: ## %bb.1: ## %cond_next.i ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: movq %rbx, %rdi @@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: sarl $31, %ecx ; CHECK-NEXT: andl %eax, %ecx ; CHECK-NEXT: cmpl $-2, %ecx -; CHECK-NEXT: je LBB0_10 +; CHECK-NEXT: je LBB0_8 ; CHECK-NEXT: ## %bb.2: ## %cond_next.i ; CHECK-NEXT: cmpl $-1, %ecx -; CHECK-NEXT: jne LBB0_3 -; CHECK-NEXT: LBB0_8: ## %bb4 +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: LBB0_3: ## %bb4 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 16(%rax), %rax -; CHECK-NEXT: jmp LBB0_9 -; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: jmp LBB0_10 +; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit ; CHECK-NEXT: cmpl $-2, %eax -; CHECK-NEXT: je LBB0_10 -; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit -; CHECK-NEXT: cmpl $-1, %eax ; CHECK-NEXT: je LBB0_8 -; CHECK-NEXT: LBB0_3: ## %bb35 +; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: LBB0_6: ## %bb35 ; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14 ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *216(%rax) ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_4 -; CHECK-NEXT: ## %bb.12: ## %cond_false.i -; CHECK-NEXT: setne %dil +; CHECK-NEXT: je LBB0_11 +; CHECK-NEXT: ## %bb.7: ## %cond_false.i ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-NEXT: movzbl %sil, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movl %eax, %r15d ; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: testb %dil, %al -; CHECK-NEXT: jne LBB0_5 -; CHECK-NEXT: LBB0_13: ## %cond_true.i200 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne LBB0_15 -; CHECK-NEXT: ## %bb.14: ## %cond_true14.i -; CHECK-NEXT: movl $4, %edi -; CHECK-NEXT: callq _feraiseexcept -; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: jmp LBB0_16 -; CHECK-NEXT: LBB0_10: ## %bb17 +; CHECK-NEXT: jne LBB0_12 +; CHECK-NEXT: jmp LBB0_14 +; CHECK-NEXT: LBB0_8: ## %bb17 ; CHECK-NEXT: callq _PyErr_Occurred ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: ## %bb.11: ## %cond_next +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.9: ## %cond_next ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 80(%rax), %rax -; CHECK-NEXT: LBB0_9: ## %bb4 +; CHECK-NEXT: LBB0_10: ## %bb4 ; CHECK-NEXT: movq 96(%rax), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq *40(%rax) -; CHECK-NEXT: jmp LBB0_24 -; CHECK-NEXT: LBB0_4: ## %cond_true.i +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_11: ## %cond_true.i ; CHECK-NEXT: movl $4, %edi ; CHECK-NEXT: callq _feraiseexcept ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; CHECK-NEXT: xorl %r15d, %r15d ; CHECK-NEXT: testb %sil, %sil -; CHECK-NEXT: sete %al +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: LBB0_12: ## %cond_false.i ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: sete %cl -; CHECK-NEXT: xorl %r15d, %r15d -; CHECK-NEXT: orb %al, %cl -; CHECK-NEXT: jne LBB0_13 -; CHECK-NEXT: LBB0_5: ## %cond_next17.i +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: ## %bb.13: ## %cond_next17.i ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movzbl %ah, %ebx -; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: jmp LBB0_18 +; CHECK-NEXT: LBB0_14: ## %cond_true.i200 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: jne LBB0_17 +; CHECK-NEXT: ## %bb.16: ## %cond_true14.i +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: callq _feraiseexcept +; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *224(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je LBB0_19 -; CHECK-NEXT: ## %bb.17: ## %cond_true61 +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.19: ## %cond_true61 ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi @@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: callq *200(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_23 -; CHECK-NEXT: ## %bb.18: ## %cond_next73 +; CHECK-NEXT: js LBB0_27 +; CHECK-NEXT: ## %bb.20: ## %cond_next73 ; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi @@ -149,13 +146,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: movl %ebp, %edx ; CHECK-NEXT: callq *232(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: LBB0_19: ## %cond_next89 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: LBB0_21: ## %cond_next89 ; CHECK-NEXT: movl $2, %edi ; CHECK-NEXT: callq _PyTuple_New ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_23 -; CHECK-NEXT: ## %bb.20: ## %cond_next97 +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.22: ## %cond_next97 ; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12 ; CHECK-NEXT: movq (%r12), %rax @@ -163,8 +160,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.25: ## %cond_next135 +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.23: ## %cond_next135 ; CHECK-NEXT: movb %r15b, 16(%rax) ; CHECK-NEXT: movq %rax, 24(%r14) ; CHECK-NEXT: movq (%r12), %rax @@ -172,22 +169,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.26: ## %cond_next182 +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.24: ## %cond_next182 ; CHECK-NEXT: movb %bl, 16(%rax) ; CHECK-NEXT: movq %rax, 32(%r14) ; CHECK-NEXT: movq %r14, %rax -; CHECK-NEXT: jmp LBB0_24 -; CHECK-NEXT: LBB0_21: ## %cond_true113 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_25: ## %cond_true113 ; CHECK-NEXT: decq (%r14) -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: ## %bb.22: ## %cond_true126 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.26: ## %cond_true126 ; CHECK-NEXT: movq 8(%r14), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: callq *48(%rax) -; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock ; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll index 7bde1b7..7cdfd51 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %rdi, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_add_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = add i128 %arg1, %arg2 ret i128 %ret @@ -46,6 +56,8 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = add i64 %arg1, %arg2 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir index ec9db78..dae2ad6 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-add.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; X64-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV2]] + ; X64-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; X64-NEXT: $rax = COPY [[UADDO]](s64) ; X64-NEXT: $rdx = COPY [[UADDE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] - ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] - ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[UV]], [[UV4]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] + ; X86-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s8) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] + ; X86-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s8) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE2]](s32), [[UADDE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 19fe5b8..470a30fd 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -25,6 +25,7 @@ body: | ; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]] ; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]] ; X64-NEXT: RET 0, implicit [[AND1]](s64) + ; ; X86-LABEL: name: test_ctlz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64) @@ -46,12 +47,15 @@ body: | ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C]](s32) ; X86-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; X86-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV6]], [[UV8]] + ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[USUBO1]](s8) + ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s8) = G_ZEXT [[TRUNC1]](s1) + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV7]], [[UV9]], [[ZEXT2]] + ; X86-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[USUBE1]](s8) ; X86-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) - ; X86-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s35) - ; X86-NEXT: RET 0, implicit [[ZEXT2]](s64) + ; X86-NEXT: [[TRUNC3:%[0-9]+]]:_(s35) = G_TRUNC [[MV2]](s64) + ; X86-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC3]](s35) + ; X86-NEXT: RET 0, implicit [[ZEXT3]](s64) %0(s64) = COPY $rdx %1:_(s35) = G_TRUNC %0(s64) %2:_(s35) = G_CTLZ %1 @@ -97,6 +101,7 @@ body: | ; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_ctlz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir index ee2b9ee..ac3bf33 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir @@ -157,8 +157,8 @@ body: | ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -192,8 +192,8 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 @@ -219,8 +219,8 @@ body: | ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](s128) ; X64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; X64-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV2]] + ; X64-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; X64-NEXT: $rax = COPY [[USUBO]](s64) ; X64-NEXT: $rdx = COPY [[USUBE]](s64) ; X64-NEXT: RET 0 @@ -230,10 +230,10 @@ body: | ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s128) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s128) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s128) - ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV4]] - ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] - ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] - ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] + ; X86-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s8) = G_USUBO [[UV]], [[UV4]] + ; X86-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s8) = G_USUBE [[UV1]], [[UV5]], [[USUBO1]] + ; X86-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s8) = G_USUBE [[UV2]], [[UV6]], [[USUBE1]] + ; X86-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s8) = G_USUBE [[UV3]], [[UV7]], [[USUBE3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) ; X86-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBE2]](s32), [[USUBE4]](s32) ; X86-NEXT: $rax = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir index 9807d13..57e729f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -97,8 +97,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir index e2d10423..f5d8477 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir @@ -32,8 +32,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[OR]](s32), [[C]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -99,8 +99,8 @@ body: | ; X86-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; X86-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s32) ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ]], [[C1]] - ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] + ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s8) = G_UADDO [[CTTZ]], [[C1]] + ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s8) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll new file mode 100644 index 0000000..41d890b --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/pr49087.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s + +define i32 @test_01(ptr %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subq %rax, %rsi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB0_4 +; CHECK-NEXT: # %bb.2: # %backedge +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: imulq $4, %rsi, %rcx +; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: cmpl %edx, (%rcx) +; CHECK-NEXT: sete %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.3: # %failure +; CHECK-NEXT: .LBB0_4: # %exit +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retq + +entry: + %scevgep = getelementptr i32, ptr %p, i64 -1 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] + %iv.next = add i64 %iv, -1 + %cond_1 = icmp eq i64 %iv, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %scevgep1 = getelementptr i32, ptr %scevgep, i64 %iv + %loaded = load atomic i32, ptr %scevgep1 unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: + unreachable +} + diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir index 8eac3eaf..76680ac 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir @@ -29,8 +29,8 @@ body: | bb.0 (%ir-block.0): %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF - %2(s1) = IMPLICIT_DEF - %3(s32), %4(s1) = G_UADDE %0, %1, %2 + %2(s8) = IMPLICIT_DEF + %3(s32), %4(s8) = G_UADDE %0, %1, %2 RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir index 773813f..b85180f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-add-x32.mir @@ -27,25 +27,24 @@ body: | bb.0 (%ir-block.0): ; X32-LABEL: name: test_add_i64 ; X32: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF - ; X32: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags - ; X32: [[COPY:%[0-9]+]]:gr32 = COPY $eflags - ; X32: $eflags = COPY [[COPY]] - ; X32: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags - ; X32: [[COPY1:%[0-9]+]]:gr32 = COPY $eflags - ; X32: $eax = COPY [[ADD32rr]] - ; X32: $edx = COPY [[ADC32rr]] - ; X32: RET 0, implicit $eax, implicit $edx + ; X32-NEXT: [[DEF1:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF2:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[DEF3:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X32-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[DEF]], [[DEF2]], implicit-def $eflags + ; X32-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: CMP8ri [[SETCCr]], 1, implicit-def $eflags + ; X32-NEXT: [[ADC32rr:%[0-9]+]]:gr32 = ADC32rr [[DEF1]], [[DEF3]], implicit-def $eflags, implicit $eflags + ; X32-NEXT: [[SETCCr1:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags + ; X32-NEXT: $eax = COPY [[ADD32rr]] + ; X32-NEXT: $edx = COPY [[ADC32rr]] + ; X32-NEXT: RET 0, implicit $eax, implicit $edx %0(s32) = IMPLICIT_DEF %1(s32) = IMPLICIT_DEF %2(s32) = IMPLICIT_DEF %3(s32) = IMPLICIT_DEF %9(s8) = G_CONSTANT i8 0 - %4(s1) = G_TRUNC %9(s8) - %5(s32), %6(s1) = G_UADDE %0, %2, %4 - %7(s32), %8(s1) = G_UADDE %1, %3, %6 + %5(s32), %6(s8) = G_UADDE %0, %2, %9 + %7(s32), %8(s8) = G_UADDE %1, %3, %6 $eax = COPY %5(s32) $edx = COPY %7(s32) RET 0, implicit $eax, implicit $edx diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll new file mode 100644 index 0000000..0cf1372 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/select-get-carry-bit.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel=1 -global-isel-abort=1 | FileCheck %s + +; Issue #120029 +define i16 @use_carry_bit(i16 %2) { +; CHECK-LABEL: use_carry_bit: +; CHECK: # %bb.0: +; CHECK-NEXT: movw $1, %ax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: addw %di, %ax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: cmovnew %di, %ax +; CHECK-NEXT: retq + %uadd = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %2, i16 1) + %res = extractvalue { i16, i1 } %uadd, 0 + %carry = extractvalue { i16, i1 } %uadd, 1 + %ret = select i1 %carry, i16 %2, i16 %res + ret i16 %ret +} + diff --git a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll index 7a035f5..be75d7c 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll @@ -7,12 +7,15 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: subq %rdx, %rax +; X64-NEXT: setb %dl +; X64-NEXT: cmpb $1, %dl ; X64-NEXT: sbbq %rcx, %rsi ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_sub_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -21,8 +24,14 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: setb %bl +; X86-NEXT: cmpb $1, %bl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl %edx, 4(%eax) @@ -30,6 +39,7 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind { ; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %ret = sub i128 %arg1, %arg2 ret i128 %ret @@ -47,6 +57,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setb %cl +; X86-NEXT: cmpb $1, %cl ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl %ret = sub i64 %arg1, %arg2 diff --git a/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll b/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll new file mode 100644 index 0000000..ce7024d --- /dev/null +++ b/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll @@ -0,0 +1,34 @@ +; RUN: llc --code-model=kernel < %s -asm-verbose=0 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: func_no_abs_sym +define i64 @func_no_abs_sym() nounwind { + ; CHECK: movq $no_abs_sym, %rax + %1 = ptrtoint ptr @no_abs_sym to i64 + ret i64 %1 +} + +; CHECK-LABEL: func_abs_sym +define i64 @func_abs_sym() nounwind { + ; CHECK: movabsq $abs_sym, %rax + %1 = ptrtoint ptr @abs_sym to i64 + ret i64 %1 +} + +; CHECK-LABEL: func_abs_sym_in_range +define i64 @func_abs_sym_in_range() nounwind { + ;; The absolute_symbol range fits in 32 bits but we still use movabs + ;; since there's no benefit to using the sign extending instruction + ;; with absolute symbols. + ; CHECK: movabsq $abs_sym_in_range, %rax + %1 = ptrtoint ptr @abs_sym_in_range to i64 + ret i64 %1 +} + +@no_abs_sym = external hidden global [0 x i8] +@abs_sym = external hidden global [0 x i8], !absolute_symbol !0 +@abs_sym_in_range = external hidden global [0 x i8], !absolute_symbol !1 + +!0 = !{i64 -1, i64 -1} ;; Full range +!1 = !{i64 -2147483648, i64 2147483648} ;; In range diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index b2651e9..de9caa5 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -230,6 +230,24 @@ entry: ret void } +define void @and_cond(i32 %a, i1 %b) { +; CHECK-LABEL: and_cond: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: setg %al +; CHECK-NEXT: notb %sil +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %al, %sil +; CHECK-NEXT: cfcmovnel %ecx, 0 +; CHECK-NEXT: retq + %is_pos = icmp sgt i32 %a, 0 + %not_b = xor i1 %b, true + %cond = and i1 %not_b, %is_pos + %mask = insertelement <1 x i1> zeroinitializer, i1 %cond, i64 0 + call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr null, i32 1, <1 x i1> %mask) + ret void +} + define i64 @redundant_test(i64 %num, ptr %p1, i64 %in) { ; CHECK-LABEL: redundant_test: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index 0de308a..5152c005 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -728,45 +728,70 @@ define void @avg_v32i8_2(ptr %a, ptr %b) nounwind { define void @avg_v64i8_2(ptr %a, ptr %b) nounwind { ; SSE2-LABEL: avg_v64i8_2: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps (%rsi), %xmm0 -; SSE2-NEXT: movaps 16(%rsi), %xmm1 -; SSE2-NEXT: movaps 32(%rsi), %xmm2 -; SSE2-NEXT: movaps 48(%rsi), %xmm3 -; SSE2-NEXT: movups %xmm3, (%rax) -; SSE2-NEXT: movups %xmm2, (%rax) -; SSE2-NEXT: movups %xmm1, (%rax) -; SSE2-NEXT: movups %xmm0, (%rax) +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: movdqa 16(%rdi), %xmm1 +; SSE2-NEXT: movdqa 32(%rdi), %xmm2 +; SSE2-NEXT: movdqa 48(%rdi), %xmm3 +; SSE2-NEXT: pavgb (%rsi), %xmm0 +; SSE2-NEXT: pavgb 16(%rsi), %xmm1 +; SSE2-NEXT: pavgb 32(%rsi), %xmm2 +; SSE2-NEXT: pavgb 48(%rsi), %xmm3 +; SSE2-NEXT: movdqu %xmm3, (%rax) +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm1, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: retq ; ; AVX1-LABEL: avg_v64i8_2: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovaps (%rsi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rsi), %ymm1 -; AVX1-NEXT: vmovups %ymm1, (%rax) -; AVX1-NEXT: vmovups %ymm0, (%rax) -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: vmovdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vpavgb (%rsi), %xmm0, %xmm0 +; AVX1-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1 +; AVX1-NEXT: vpavgb 32(%rsi), %xmm2, %xmm2 +; AVX1-NEXT: vpavgb 48(%rsi), %xmm3, %xmm3 +; AVX1-NEXT: vmovdqu %xmm3, (%rax) +; AVX1-NEXT: vmovdqu %xmm2, (%rax) +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) ; AVX1-NEXT: retq ; ; AVX2-LABEL: avg_v64i8_2: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovaps (%rsi), %ymm0 -; AVX2-NEXT: vmovaps 32(%rsi), %ymm1 -; AVX2-NEXT: vmovups %ymm1, (%rax) -; AVX2-NEXT: vmovups %ymm0, (%rax) +; AVX2-NEXT: vmovdqa (%rdi), %ymm0 +; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 +; AVX2-NEXT: vpavgb (%rsi), %ymm0, %ymm0 +; AVX2-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1 +; AVX2-NEXT: vmovdqu %ymm1, (%rax) +; AVX2-NEXT: vmovdqu %ymm0, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: avg_v64i8_2: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rsi), %zmm0 -; AVX512-NEXT: vmovups %zmm0, (%rax) -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: avg_v64i8_2: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1 +; AVX512F-NEXT: vpavgb (%rsi), %ymm0, %ymm0 +; AVX512F-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqu %ymm1, (%rax) +; AVX512F-NEXT: vmovdqu %ymm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: avg_v64i8_2: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpavgb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %1 = load <64 x i8>, ptr %a %2 = load <64 x i8>, ptr %b %3 = zext <64 x i8> %1 to <64 x i32> %4 = zext <64 x i8> %2 to <64 x i32> - %5 = add nuw nsw <64 x i32> %4, %4 + %5 = add nuw nsw <64 x i32> %3, %4 %6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %8 = trunc <64 x i32> %7 to <64 x i8> @@ -774,7 +799,6 @@ define void @avg_v64i8_2(ptr %a, ptr %b) nounwind { ret void } - define void @avg_v4i16_2(ptr %a, ptr %b) nounwind { ; SSE2-LABEL: avg_v4i16_2: ; SSE2: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx-shift.ll b/llvm/test/CodeGen/X86/avx-shift.ll index c9c09d7..3bce843 100644 --- a/llvm/test/CodeGen/X86/avx-shift.ll +++ b/llvm/test/CodeGen/X86/avx-shift.ll @@ -201,7 +201,7 @@ define <8 x i32> @vshift08_add(<8 x i32> %a, <8 x i32> %y) { define <4 x i32> @vshift13(<4 x i32> %in) { ; CHECK-LABEL: vshift13: ; CHECK: # %bb.0: -; CHECK-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,16] ; CHECK-NEXT: retq %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4> ret <4 x i32> %T diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll index 70b3b99..1133cdfd 100644 --- a/llvm/test/CodeGen/X86/avx2-arith.ll +++ b/llvm/test/CodeGen/X86/avx2-arith.ll @@ -199,12 +199,12 @@ define <8 x i32> @mul_const5(<8 x i32> %x) { define <8 x i32> @mul_const6(<8 x i32> %x) { ; X86-LABEL: mul_const6: ; X86: # %bb.0: -; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [0,0,0,2,0,2,0,0] ; X86-NEXT: retl ; ; X64-LABEL: mul_const6: ; X64: # %bb.0: -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,0,0,2,0,2,0,0] ; X64-NEXT: retq %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0> ret <8 x i32> %y diff --git a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll index 2aea9c1..632d90d 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll @@ -27,7 +27,7 @@ entry: !1 = !{i64 0, !"_ZTSFivE.generalized"} !2 = !{i64 0, !"_ZTSFviE.generalized"} -; CHECK: .section .callgraph,"o",@progbits,.text +; CHECK: .section .llvm.callgraph,"o",@progbits,.text ;; Version ; CHECK-NEXT: .byte 0 ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll index 1aabf66..ed6849a 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll @@ -1,8 +1,8 @@ ;; Test if temporary labels are generated for each indirect callsite. -;; Test if the .callgraph section contains the MD5 hash of callees' type (type id) +;; Test if the .llvm.callgraph section contains the MD5 hash of callees' type (type id) ;; is correctly paired with its corresponding temporary label generated for indirect ;; call sites annotated with !callee_type metadata. -;; Test if the .callgraph section contains unique direct callees. +;; Test if the .llvm.callgraph section contains unique direct callees. ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s @@ -36,7 +36,7 @@ entry: !4 = !{!5} !5 = !{i64 0, !"_ZTSFPvS_E.generalized"} -; CHECK: .section .callgraph,"o",@progbits,.text +; CHECK: .section .llvm.callgraph,"o",@progbits,.text ;; Version ; CHECK-NEXT: .byte 0 ;; Flags diff --git a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll index 34dc5b8..49cc335 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll @@ -1,7 +1,10 @@ -;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls. +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file for tailcalls. + +; REQUIRES: x86-registered-target +; REQUIRES: arm-registered-target ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { entry: @@ -27,7 +30,7 @@ declare !type !2 i32 @bar(i8 signext) !2 = !{i64 0, !"_ZTSFicE.generalized"} !3 = !{i64 0, !"_ZTSFiiE.generalized"} -; CHECK: Hex dump of section '.callgraph': +; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00000000 00008e19 0b7f3326 ; CHECK-NEXT: 0x00000010 e3000154 86bc5981 4b8e3000 05000000 ;; Verify that the type id 0x308e4b8159bc8654 is in section. diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll index c144a24..8a1c6ca 100644 --- a/llvm/test/CodeGen/X86/call-graph-section.ll +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -1,7 +1,10 @@ -;; Tests that we store the type identifiers in .callgraph section of the object file. +;; Tests that we store the type identifiers in .llvm.callgraph section of the object file. + +; REQUIRES: x86-registered-target +; REQUIRES: arm-registered-target ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \ -; RUN: llvm-readelf -x .callgraph - | FileCheck %s +; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s declare !type !0 void @foo() @@ -31,7 +34,7 @@ entry: ;; Make sure following type IDs are in call graph section ;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814 -; CHECK: Hex dump of section '.callgraph': +; CHECK: Hex dump of section '.llvm.callgraph': ; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000000 ; CHECK-NEXT: 0x00000010 00000324 44f731f5 eecb3e54 86bc5981 ; CHECK-NEXT: 0x00000020 4b8e307a de6814f8 97fd77 diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll index ae4d24f..29c41ca 100644 --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -66,7 +66,7 @@ define <4 x i32> @combine_vec_mul_pow2a(<4 x i32> %x) { define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) { ; SSE-LABEL: combine_vec_mul_pow2b: ; SSE: # %bb.0: -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,16] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_pow2b: @@ -120,12 +120,12 @@ define <4 x i32> @combine_vec_mul_negpow2a(<4 x i32> %x) { define <4 x i32> @combine_vec_mul_negpow2b(<4 x i32> %x) { ; SSE-LABEL: combine_vec_mul_negpow2b: ; SSE: # %bb.0: -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967294,4294967292,4294967280] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_negpow2b: ; AVX: # %bb.0: -; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967294,4294967292,4294967280] ; AVX-NEXT: retq %1 = mul <4 x i32> %x, <i32 -1, i32 -2, i32 -4, i32 -16> ret <4 x i32> %1 @@ -176,12 +176,12 @@ define <4 x i64> @combine_vec_mul_negpow2c(<4 x i64> %x) { define <4 x i32> @combine_vec_mul_shl_const(<4 x i32> %x) { ; SSE-LABEL: combine_vec_mul_shl_const: ; SSE: # %bb.0: -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,12,1280,458752] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_shl_const: ; AVX: # %bb.0: -; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,12,1280,458752] ; AVX-NEXT: retq %1 = shl <4 x i32> %x, <i32 1, i32 2, i32 8, i32 16> %2 = mul <4 x i32> %1, <i32 1, i32 3, i32 5, i32 7> @@ -193,7 +193,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse0(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: combine_vec_mul_shl_oneuse0: ; SSE: # %bb.0: ; SSE-NEXT: pmulld %xmm1, %xmm0 -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_shl_oneuse0: @@ -210,7 +210,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse1(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: combine_vec_mul_shl_oneuse1: ; SSE: # %bb.0: ; SSE-NEXT: pmulld %xmm1, %xmm0 -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_shl_oneuse1: @@ -226,7 +226,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse1(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @combine_vec_mul_shl_multiuse0(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: combine_vec_mul_shl_multiuse0: ; SSE: # %bb.0: -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536] ; SSE-NEXT: pmulld %xmm0, %xmm1 ; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -246,7 +246,7 @@ define <4 x i32> @combine_vec_mul_shl_multiuse0(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @combine_vec_mul_shl_multiuse1(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: combine_vec_mul_shl_multiuse1: ; SSE: # %bb.0: -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536] ; SSE-NEXT: pmulld %xmm0, %xmm1 ; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -268,13 +268,13 @@ define <4 x i32> @combine_vec_mul_shl_multiuse1(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @combine_vec_mul_add(<4 x i32> %x) { ; SSE-LABEL: combine_vec_mul_add: ; SSE: # %bb.0: -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,6,2,0] ; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_add: ; AVX: # %bb.0: -; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,6,2,0] ; AVX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = add <4 x i32> %x, <i32 1, i32 2, i32 8, i32 16> diff --git a/llvm/test/CodeGen/X86/combine-multiplies.ll b/llvm/test/CodeGen/X86/combine-multiplies.ll index a5d9846..4bdf20d 100644 --- a/llvm/test/CodeGen/X86/combine-multiplies.ll +++ b/llvm/test/CodeGen/X86/combine-multiplies.ll @@ -142,9 +142,9 @@ define void @testCombineMultiplies_non_splat(<4 x i32> %v1) nounwind { ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [11,22,33,44] ; CHECK-NEXT: paddd %xmm0, %xmm1 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [22,33,44,55] ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [33,u,55,u] ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [242,726,1452,2420] diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll index 70335f8..ff5329c 100644 --- a/llvm/test/CodeGen/X86/combine-pmuldq.ll +++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll @@ -204,16 +204,16 @@ define i32 @PR43159(ptr %a0) { ; SSE: # %bb.0: # %entry ; SSE-NEXT: movdqa (%rdi), %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [344322273,344322273,1916962805,1916962805] ; SSE-NEXT: movdqa %xmm0, %xmm2 ; SSE-NEXT: psrld $1, %xmm2 ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] ; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,2147483648,2147483648] ; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: psrld $7, %xmm0 -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1645975491,344322273,2164392969,1916962805] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE-NEXT: psrld $6, %xmm1 ; SSE-NEXT: movd %xmm1, %edi @@ -226,15 +226,15 @@ define i32 @PR43159(ptr %a0) { ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vmovdqa (%rdi), %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,344322273,1916962805,1916962805] ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [2147483648,2147483648,2147483648,2147483648] ; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpsrld $7, %xmm1, %xmm1 ; AVX1-NEXT: vpsrld $1, %xmm0, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1645975491,344322273,2164392969,1916962805] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX1-NEXT: vpsrld $6, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %edi @@ -247,9 +247,9 @@ define i32 @PR43159(ptr %a0) { ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vmovdqa (%rdi), %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,u,1916962805,u] ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1645975491,344322273,2164392969,1916962805] ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 @@ -270,9 +270,9 @@ define i32 @PR43159(ptr %a0) { ; AVX512VL: # %bb.0: # %entry ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,u,1916962805,u] ; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1645975491,344322273,2164392969,1916962805] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] ; AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 @@ -293,9 +293,9 @@ define i32 @PR43159(ptr %a0) { ; AVX512DQVL: # %bb.0: # %entry ; AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,u,1916962805,u] ; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1645975491,344322273,2164392969,1916962805] ; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] ; AVX512DQVL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll index 65d74c8..e7152ec 100644 --- a/llvm/test/CodeGen/X86/combine-rotates.ll +++ b/llvm/test/CodeGen/X86/combine-rotates.ll @@ -10,9 +10,9 @@ define <4 x i32> @combine_vec_rot_rot(<4 x i32> %x) { ; SSE2-LABEL: combine_vec_rot_rot: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [524288,131072,32768,8192] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [131072,u,8192,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 6bcbfe1..f7baee9 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -2927,7 +2927,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,0,0,0,0,0,0,37632] +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: packuswb %xmm3, %xmm1 ; SSE2-NEXT: paddb %xmm1, %xmm0 @@ -2947,7 +2947,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,37632] +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm2, %xmm1 ; SSE41-NEXT: paddb %xmm0, %xmm1 @@ -2971,7 +2971,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm1 @@ -3044,7 +3044,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; XOP: # %bb.0: ; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; XOP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632] +; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147] ; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15] ; XOP-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll index 1ce10c37..9548967 100644 --- a/llvm/test/CodeGen/X86/combine-shl.ll +++ b/llvm/test/CodeGen/X86/combine-shl.ll @@ -88,7 +88,7 @@ define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) { ; SSE2-NEXT: pmuludq %xmm0, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,u,8192,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movdqa %xmm1, %xmm0 @@ -97,7 +97,7 @@ define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) { ; SSE41-LABEL: combine_vec_shl_known_zero1: ; SSE41: # %bb.0: ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65536,32768,16384,8192] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_vec_shl_known_zero1: @@ -198,16 +198,16 @@ define <4 x i32> @combine_vec_shl_shl1(<4 x i32> %x) { ; SSE2-LABEL: combine_vec_shl_shl1: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,64,256,1024] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [64,u,1024,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: combine_vec_shl_shl1: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,64,256,1024] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_vec_shl_shl1: @@ -304,17 +304,17 @@ define <8 x i32> @combine_vec_shl_ext_shl2(<8 x i16> %x) { ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; SSE2-NEXT: psrad $16, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [131072,524288,2097152,8388608] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [524288,u,8388608,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] ; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [33554432,134217728,536870912,2147483648] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [134217728,u,2147483648,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movdqa %xmm2, %xmm0 @@ -323,10 +323,10 @@ define <8 x i32> @combine_vec_shl_ext_shl2(<8 x i16> %x) { ; SSE41-LABEL: combine_vec_shl_ext_shl2: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [131072,524288,2097152,8388608] ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [33554432,134217728,536870912,2147483648] ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; @@ -673,9 +673,9 @@ define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) { ; SSE2-LABEL: combine_vec_shl_add1: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4,u,16,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -683,7 +683,7 @@ define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) { ; ; SSE41-LABEL: combine_vec_shl_add1: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16] ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: retq ; @@ -726,9 +726,9 @@ define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) { ; SSE2-LABEL: combine_vec_shl_or1: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4,u,16,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -736,7 +736,7 @@ define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) { ; ; SSE41-LABEL: combine_vec_shl_or1: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16] ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: retq ; @@ -765,7 +765,7 @@ define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) { ; ; SSE41-LABEL: combine_vec_shl_mul0: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [20,20,20,20] ; SSE41-NEXT: retq ; ; AVX2-LABEL: combine_vec_shl_mul0: @@ -787,21 +787,21 @@ define <4 x i32> @combine_vec_shl_mul1(<4 x i32> %x) { ; SSE2-LABEL: combine_vec_shl_mul1: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [10,24,56,128] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [24,u,128,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: combine_vec_shl_mul1: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [10,24,56,128] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_vec_shl_mul1: ; AVX: # %bb.0: -; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [10,24,56,128] ; AVX-NEXT: retq %1 = mul <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8> %2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4> @@ -813,9 +813,9 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) { ; SSE2-LABEL: combine_vec_add_shl_nonsplat: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,16,32] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [8,u,32,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -823,7 +823,7 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) { ; ; SSE41-LABEL: combine_vec_add_shl_nonsplat: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,16,32] ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: retq ; @@ -852,7 +852,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) { ; SSE2-NEXT: pmuludq %xmm0, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [8,u,32,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -863,7 +863,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,16,32] ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll index 4b01c16..0ca79ad 100644 --- a/llvm/test/CodeGen/X86/combine-srem.ll +++ b/llvm/test/CodeGen/X86/combine-srem.ll @@ -272,7 +272,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) { ; SSE-NEXT: psrad $2, %xmm2 ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7] -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,2,4,8] ; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -291,7 +291,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) { ; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,2,4,8] ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -336,7 +336,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) { ; SSE-NEXT: psrld $1, %xmm1 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967294,4294967292,4294967288,4294967280] ; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -358,7 +358,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) { ; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [4294967294,4294967292,4294967288,4294967280] ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -368,7 +368,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) { ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [4294967294,4294967292,4294967288,4294967280] ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq %1 = srem <4 x i32> %x, <i32 -2, i32 -4, i32 -8, i32 -16> diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index 5571519..233735d 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -502,11 +502,11 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; SSE2-NEXT: psubw %xmm1, %xmm0 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,0,0,0] ; SSE2-NEXT: paddw %xmm1, %xmm0 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,0,65535,65535,0] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,16,0,8,8,0,0,0,0,0,2,0,2,0,0,0] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -517,7 +517,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; SSE41-NEXT: psubw %xmm1, %xmm0 -; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,0,0,0] ; SSE41-NEXT: paddw %xmm1, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4096,2048,8,u,u,2,2,u] ; SSE41-NEXT: pmulhuw %xmm0, %xmm1 @@ -530,7 +530,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,0,0,0] ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4096,2048,8,u,u,2,2,u] ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6],xmm0[7] @@ -541,7 +541,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0 -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,0,0,0] ; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: retq @@ -630,7 +630,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [171,0,0,0] ; SSE2-NEXT: psrlw $15, %xmm0 ; SSE2-NEXT: pandn %xmm0, %xmm1 ; SSE2-NEXT: por %xmm2, %xmm1 @@ -641,7 +641,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [171,0,0,0] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm2, %xmm2 ; SSE41-NEXT: psrlw $7, %xmm2 @@ -654,7 +654,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; AVX-LABEL: combine_vec_udiv_nonuniform4: ; AVX: # %bb.0: ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [171,0,0,0] ; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX-NEXT: vpackuswb %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpsrlw $7, %xmm1, %xmm1 @@ -665,14 +665,12 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; ; XOP-LABEL: combine_vec_udiv_nonuniform4: ; XOP: # %bb.0: -; XOP-NEXT: movl $171, %eax +; XOP-NEXT: movl $249, %eax ; XOP-NEXT: vmovd %eax, %xmm1 ; XOP-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1 -; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1 -; XOP-NEXT: movl $249, %eax -; XOP-NEXT: vmovd %eax, %xmm2 -; XOP-NEXT: vpshlb %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [171,0,0,0] +; XOP-NEXT: vpsrlw $8, %xmm2, %xmm2 +; XOP-NEXT: vpshlb %xmm1, %xmm2, %xmm1 ; XOP-NEXT: vpmovsxwq {{.*#+}} xmm2 = [18446744073709551360,18446744073709551615] ; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; XOP-NEXT: retq @@ -691,7 +689,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; SSE2-NEXT: psubw %xmm3, %xmm0 ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [u,32768,0,0,0,0,0,32768] ; SSE2-NEXT: paddw %xmm3, %xmm0 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [u,u,0,4,0,4,16,0,4,0,0,4,0,0,0,16] ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 ; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/combine-urem.ll b/llvm/test/CodeGen/X86/combine-urem.ll index 715d5c7..34c7d3d 100644 --- a/llvm/test/CodeGen/X86/combine-urem.ll +++ b/llvm/test/CodeGen/X86/combine-urem.ll @@ -327,7 +327,7 @@ define <4 x i32> @combine_vec_urem_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) { ; SSE-NEXT: pslld $23, %xmm1 ; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1 -; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,4,8,16] ; SSE-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE-NEXT: paddd %xmm1, %xmm2 ; SSE-NEXT: pand %xmm2, %xmm0 @@ -338,7 +338,7 @@ define <4 x i32> @combine_vec_urem_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) { ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,4,8,16] ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll index 40c38c2..646629d 100644 --- a/llvm/test/CodeGen/X86/cpus-intel.ll +++ b/llvm/test/CodeGen/X86/cpus-intel.ll @@ -38,6 +38,8 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=novalake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty @@ -104,6 +106,8 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=novalake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty diff --git a/llvm/test/CodeGen/X86/dagcombine-shifts.ll b/llvm/test/CodeGen/X86/dagcombine-shifts.ll index 345b2b9..19b9452 100644 --- a/llvm/test/CodeGen/X86/dagcombine-shifts.ll +++ b/llvm/test/CodeGen/X86/dagcombine-shifts.ll @@ -437,9 +437,9 @@ define <4 x i32> @shift_zext_shl2_vec(<4 x i8> %x) nounwind { ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,256,128,64] ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,u,64,u] ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index be5e23c..d16522e 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -3,10 +3,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefixes=FMA4,FMA4-INFS ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefixes=FMA4,FMA4-INFS ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA,FMA-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -enable-no-infs-fp-math | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS ; ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) @@ -561,753 +557,834 @@ define <2 x double> @test_2f64_fmsub_load(ptr %a0, <2 x double> %a1, <2 x double ; define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_add_x_one_y: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_add_x_one_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_add_x_one_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq %a = fadd contract <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> %m = fmul contract <4 x float> %a, %y ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_add_x_one_y_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_add_x_one_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_add_x_one_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_add_x_one_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul contract ninf <4 x float> %a, %y + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_y_add_x_one: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_one: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_one: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %a = fadd contract <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } -define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq - %a = fadd contract <4 x float> %x, <float 1.0, float undef, float 1.0, float undef> +define <4 x float> @test_v4f32_mul_y_add_x_one_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_add_x_one_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_one_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_one_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul contract ninf <4 x float> %y, %a + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_add_x_one_poisons(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_add_x_one_poisons: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_one_poisons: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_one_poisons: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq + %a = fadd contract <4 x float> %x, <float 1.0, float poison, float 1.0, float poison> %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_y_add_x_one_poisons_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_add_x_one_poisons_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_one_poisons_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_one_poisons_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf<4 x float> %x, <float 1.0, float poison, float 1.0, float poison> + %m = fmul contract ninf<4 x float> %y, %a + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_add_x_negone_y: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_add_x_negone_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_add_x_negone_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq %a = fadd contract <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> %m = fmul contract <4 x float> %a, %y ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_add_x_negone_y_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_add_x_negone_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_add_x_negone_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_add_x_negone_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf<4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul contract ninf<4 x float> %a, %y + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_y_add_x_negone: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_negone: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_negone: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %a = fadd contract <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } -define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq - %a = fadd contract <4 x float> %x, <float undef, float -1.0, float undef, float -1.0> +define <4 x float> @test_v4f32_mul_y_add_x_negone_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_add_x_negone_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_negone_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_negone_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf<4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul contract ninf<4 x float> %y, %a + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_add_x_negone_poisons(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_add_x_negone_poisons: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_negone_poisons: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_negone_poisons: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq + %a = fadd contract <4 x float> %x, <float poison, float -1.0, float poison, float -1.0> %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_y_add_x_negone_poisons_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_add_x_negone_poisons_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_add_x_negone_poisons_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_add_x_negone_poisons_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf<4 x float> %x, <float poison, float -1.0, float poison, float -1.0> + %m = fmul contract ninf<4 x float> %y, %a + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_sub_one_x_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_one_x_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_one_x_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_sub_one_x_y_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_sub_one_x_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_one_x_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_one_x_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + %m = fmul contract ninf<4 x float> %s, %y + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_y_sub_one_x: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_one_x: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_one_x: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } -define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq - %s = fsub contract <4 x float> <float 1.0, float undef, float 1.0, float 1.0>, %x +define <4 x float> @test_v4f32_mul_y_sub_one_x_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_one_x_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_one_x_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_one_x_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_one_x_poisons(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_one_x_poisons: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_one_x_poisons: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_one_x_poisons: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq + %s = fsub contract <4 x float> <float 1.0, float poison, float 1.0, float 1.0>, %x %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_y_sub_one_x_poisons_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_one_x_poisons_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_one_x_poisons_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_one_x_poisons_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> <float 1.0, float poison, float 1.0, float 1.0>, %x + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_sub_negone_x_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_sub_negone_x_y_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_sub_negone_x_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x + %m = fmul contract ninf<4 x float> %s, %y + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_y_sub_negone_x: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } -define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq - %s = fsub contract <4 x float> <float -1.0, float -1.0, float undef, float -1.0>, %x +define <4 x float> @test_v4f32_mul_y_sub_negone_x_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_negone_x_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_negone_x_poisons(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_negone_x_poisons: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x_poisons: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x_poisons: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq + %s = fsub contract <4 x float> <float -1.0, float -1.0, float poison, float -1.0>, %x %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_y_sub_negone_x_poisons_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_negone_x_poisons_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x_poisons_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x_poisons_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> <float -1.0, float -1.0, float poison, float -1.0>, %x + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_sub_x_one_y: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_x_one_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_x_one_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_sub_x_one_y_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_sub_x_one_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_x_one_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_x_one_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul contract ninf<4 x float> %s, %y + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_y_sub_x_one: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_x_one: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_one: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } -define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq - %s = fsub contract <4 x float> %x, <float 1.0, float 1.0, float 1.0, float undef> +define <4 x float> @test_v4f32_mul_y_sub_x_one_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_x_one_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_x_one_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_one_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_x_one_poisons(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_x_one_poisons: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_x_one_poisons: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_one_poisons: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq + %s = fsub contract <4 x float> %x, <float 1.0, float 1.0, float 1.0, float poison> %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_y_sub_x_one_poisons_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_x_one_poisons_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_x_one_poisons_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_one_poisons_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> %x, <float 1.0, float 1.0, float 1.0, float poison> + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_sub_x_negone_y: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_sub_x_negone_y_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_sub_x_negone_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul contract ninf<4 x float> %s, %y + ret <4 x float> %m +} + define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_mul_y_sub_x_negone: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %s = fsub contract <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } -define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) { -; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 -; AVX512-NOINFS-NEXT: retq - %s = fsub contract <4 x float> %x, <float undef, float -1.0, float -1.0, float -1.0> +define <4 x float> @test_v4f32_mul_y_sub_x_negone_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_x_negone_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + +define <4 x float> @test_v4f32_mul_y_sub_x_negone_poisons(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_x_negone_poisons: +; FMA: # %bb.0: +; FMA-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone_poisons: +; FMA4: # %bb.0: +; FMA4-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone_poisons: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq + %s = fsub contract <4 x float> %x, <float poison, float -1.0, float -1.0, float -1.0> %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } +define <4 x float> @test_v4f32_mul_y_sub_x_negone_poisons_ninf(<4 x float> %x, <4 x float> %y) { +; FMA-LABEL: test_v4f32_mul_y_sub_x_negone_poisons_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; FMA-NEXT: retq ; -; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) +; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone_poisons_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA4-NEXT: retq ; +; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone_poisons_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf<4 x float> %x, <float poison, float -1.0, float -1.0, float -1.0> + %m = fmul contract ninf<4 x float> %y, %s + ret <4 x float> %m +} + define float @test_f32_interp(float %x, float %y, float %t) { -; FMA-INFS-LABEL: test_f32_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; FMA-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 -; FMA-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 -; FMA-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_f32_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 -; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_f32_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX512-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 -; AVX512-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 -; AVX512-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_f32_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_f32_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_f32_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_f32_interp: +; FMA: # %bb.0: +; FMA-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; FMA-NEXT: vsubss %xmm2, %xmm3, %xmm3 +; FMA-NEXT: vmulss %xmm3, %xmm1, %xmm1 +; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_f32_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; FMA4-NEXT: vsubss %xmm2, %xmm3, %xmm3 +; FMA4-NEXT: vmulss %xmm3, %xmm1, %xmm1 +; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_f32_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX512-NEXT: vsubss %xmm2, %xmm3, %xmm3 +; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz float 1.0, %t %tx = fmul contract nsz float %x, %t %ty = fmul contract nsz float %y, %t1 @@ -1315,48 +1392,55 @@ define float @test_f32_interp(float %x, float %y, float %t) { ret float %r } +define float @test_f32_interp_ninf(float %x, float %y, float %t) { +; FMA-LABEL: test_f32_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_f32_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_f32_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz float 1.0, %t + %tx = fmul contract ninf nsz float %x, %t + %ty = fmul contract ninf nsz float %y, %t1 + %r = fadd contract ninf nsz float %tx, %ty + ret float %r +} + define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { -; FMA-INFS-LABEL: test_v4f32_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 -; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 -; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f32_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 -; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f32_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 -; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 -; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f32_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f32_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f32_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f32_interp: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %xmm2, %xmm3, %xmm3 +; FMA-NEXT: vmulps %xmm3, %xmm1, %xmm1 +; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %xmm2, %xmm3, %xmm3 +; FMA4-NEXT: vmulps %xmm3, %xmm1, %xmm1 +; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %xmm2, %xmm3, %xmm3 +; AVX512-NEXT: vmulps %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t %tx = fmul contract nsz <4 x float> %x, %t %ty = fmul contract nsz <4 x float> %y, %t1 @@ -1364,48 +1448,55 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float ret <4 x float> %r } +define <4 x float> @test_v4f32_interp_ninf(<4 x float> %x, <4 x float> %y, <4 x float> %t) { +; FMA-LABEL: test_v4f32_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f32_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f32_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t + %tx = fmul contract ninf nsz <4 x float> %x, %t + %ty = fmul contract ninf nsz <4 x float> %y, %t1 + %r = fadd contract ninf nsz <4 x float> %tx, %ty + ret <4 x float> %r +} + define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { -; FMA-INFS-LABEL: test_v8f32_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 -; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f32_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 -; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f32_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 -; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f32_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f32_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f32_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f32_interp: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %ymm2, %ymm3, %ymm3 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f32_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %ymm2, %ymm3, %ymm3 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f32_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %ymm2, %ymm3, %ymm3 +; AVX512-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t %tx = fmul contract nsz <8 x float> %x, %t %ty = fmul contract nsz <8 x float> %y, %t1 @@ -1413,48 +1504,55 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float ret <8 x float> %r } +define <8 x float> @test_v8f32_interp_ninf(<8 x float> %x, <8 x float> %y, <8 x float> %t) { +; FMA-LABEL: test_v8f32_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f32_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f32_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t + %tx = fmul contract ninf nsz <8 x float> %x, %t + %ty = fmul contract ninf nsz <8 x float> %y, %t1 + %r = fadd contract ninf nsz <8 x float> %tx, %ty + ret <8 x float> %r +} + define double @test_f64_interp(double %x, double %y, double %t) { -; FMA-INFS-LABEL: test_f64_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] -; FMA-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 -; FMA-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 -; FMA-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_f64_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] -; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 -; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_f64_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] -; AVX512-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 -; AVX512-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 -; AVX512-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_f64_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_f64_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_f64_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_f64_interp: +; FMA: # %bb.0: +; FMA-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] +; FMA-NEXT: vsubsd %xmm2, %xmm3, %xmm3 +; FMA-NEXT: vmulsd %xmm3, %xmm1, %xmm1 +; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_f64_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] +; FMA4-NEXT: vsubsd %xmm2, %xmm3, %xmm3 +; FMA4-NEXT: vmulsd %xmm3, %xmm1, %xmm1 +; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_f64_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] +; AVX512-NEXT: vsubsd %xmm2, %xmm3, %xmm3 +; AVX512-NEXT: vmulsd %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz double 1.0, %t %tx = fmul contract nsz double %x, %t %ty = fmul contract nsz double %y, %t1 @@ -1462,51 +1560,58 @@ define double @test_f64_interp(double %x, double %y, double %t) { ret double %r } +define double @test_f64_interp_ninf(double %x, double %y, double %t) { +; FMA-LABEL: test_f64_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_f64_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_f64_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz double 1.0, %t + %tx = fmul contract ninf nsz double %x, %t + %ty = fmul contract ninf nsz double %y, %t1 + %r = fadd contract ninf nsz double %tx, %ty + ret double %r +} + define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { -; FMA-INFS-LABEL: test_v2f64_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] -; FMA-INFS-NEXT: # xmm3 = mem[0,0] -; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 -; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 -; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v2f64_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: # xmm3 = mem[0,0] -; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 -; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 -; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v2f64_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: # xmm3 = mem[0,0] -; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 -; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 -; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v2f64_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v2f64_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v2f64_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v2f64_interp: +; FMA: # %bb.0: +; FMA-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] +; FMA-NEXT: # xmm3 = mem[0,0] +; FMA-NEXT: vsubpd %xmm2, %xmm3, %xmm3 +; FMA-NEXT: vmulpd %xmm3, %xmm1, %xmm1 +; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v2f64_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] +; FMA4-NEXT: # xmm3 = mem[0,0] +; FMA4-NEXT: vsubpd %xmm2, %xmm3, %xmm3 +; FMA4-NEXT: vmulpd %xmm3, %xmm1, %xmm1 +; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v2f64_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] +; AVX512-NEXT: # xmm3 = mem[0,0] +; AVX512-NEXT: vsubpd %xmm2, %xmm3, %xmm3 +; AVX512-NEXT: vmulpd %xmm3, %xmm1, %xmm1 +; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz <2 x double> <double 1.0, double 1.0>, %t %tx = fmul contract nsz <2 x double> %x, %t %ty = fmul contract nsz <2 x double> %y, %t1 @@ -1514,48 +1619,55 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do ret <2 x double> %r } +define <2 x double> @test_v2f64_interp_ninf(<2 x double> %x, <2 x double> %y, <2 x double> %t) { +; FMA-LABEL: test_v2f64_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v2f64_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v2f64_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 +; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz <2 x double> <double 1.0, double 1.0>, %t + %tx = fmul contract ninf nsz <2 x double> %x, %t + %ty = fmul contract ninf nsz <2 x double> %y, %t1 + %r = fadd contract ninf nsz <2 x double> %tx, %ty + ret <2 x double> %r +} + define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { -; FMA-INFS-LABEL: test_v4f64_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 -; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v4f64_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 -; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v4f64_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 -; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 -; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v4f64_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v4f64_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v4f64_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v4f64_interp: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubpd %ymm2, %ymm3, %ymm3 +; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1 +; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f64_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubpd %ymm2, %ymm3, %ymm3 +; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f64_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubpd %ymm2, %ymm3, %ymm3 +; AVX512-NEXT: vmulpd %ymm3, %ymm1, %ymm1 +; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t %tx = fmul contract nsz <4 x double> %x, %t %ty = fmul contract nsz <4 x double> %y, %t1 @@ -1563,6 +1675,31 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do ret <4 x double> %r } +define <4 x double> @test_v4f64_interp_ninf(<4 x double> %x, <4 x double> %y, <4 x double> %t) { +; FMA-LABEL: test_v4f64_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v4f64_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v4f64_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 +; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t + %tx = fmul contract ninf nsz <4 x double> %x, %t + %ty = fmul contract ninf nsz <4 x double> %y, %t1 + %r = fadd contract ninf nsz <4 x double> %tx, %ty + ret <4 x double> %r +} + ; ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) ; @@ -2022,3 +2159,7 @@ define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d, %a2 = fadd contract fast float %n0, %a1 ret float %a2 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX512-INFS: {{.*}} +; FMA-INFS: {{.*}} +; FMA4-INFS: {{.*}} diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll index f0af394..4c16cf9 100644 --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -3,10 +3,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS ; ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) @@ -257,617 +253,668 @@ define <8 x double> @test_8f64_fmsub_load(ptr %a0, <8 x double> %a1, <8 x double ; define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) { -; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v16f32_mul_add_x_one_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v16f32_mul_add_x_one_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_add_x_one_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_add_x_one_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq %a = fadd contract <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> %m = fmul contract <16 x float> %a, %y ret <16 x float> %m } +define <16 x float> @test_v16f32_mul_add_x_one_y_ninf(<16 x float> %x, <16 x float> %y) { +; FMA-LABEL: test_v16f32_mul_add_x_one_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 +; FMA-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_add_x_one_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_add_x_one_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul contract ninf <16 x float> %a, %y + ret <16 x float> %m +} + define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) { -; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_one: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 -; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 -; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f64_mul_y_add_x_one: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_add_x_one: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_add_x_one: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %a = fadd contract <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> %m = fmul contract <8 x double> %y, %a ret <8 x double> %m } +define <8 x double> @test_v8f64_mul_y_add_x_one_ninf(<8 x double> %x, <8 x double> %y) { +; FMA-LABEL: test_v8f64_mul_y_add_x_one_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 +; FMA-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_add_x_one_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_add_x_one_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> + %m = fmul contract ninf <8 x double> %y, %a + ret <8 x double> %m +} + define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) { -; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v16f32_mul_add_x_negone_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v16f32_mul_add_x_negone_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_add_x_negone_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_add_x_negone_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq %a = fadd contract <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> %m = fmul contract <16 x float> %a, %y ret <16 x float> %m } +define <16 x float> @test_v16f32_mul_add_x_negone_y_ninf(<16 x float> %x, <16 x float> %y) { +; FMA-LABEL: test_v16f32_mul_add_x_negone_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_add_x_negone_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_add_x_negone_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul contract ninf <16 x float> %a, %y + ret <16 x float> %m +} + define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) { -; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_negone: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f64_mul_y_add_x_negone: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_add_x_negone: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_add_x_negone: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %a = fadd contract <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> %m = fmul contract <8 x double> %y, %a ret <8 x double> %m } +define <8 x double> @test_v8f64_mul_y_add_x_negone_ninf(<8 x double> %x, <8 x double> %y) { +; FMA-LABEL: test_v8f64_mul_y_add_x_negone_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_add_x_negone_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_add_x_negone_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 +; AVX512-NEXT: retq + %a = fadd contract ninf <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> + %m = fmul contract ninf <8 x double> %y, %a + ret <8 x double> %m +} + define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) { -; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 -; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 -; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v16f32_mul_sub_one_x_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0 -; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm2 -; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2 -; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v16f32_mul_sub_one_x_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_one_x_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA4-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_one_x_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } +define <16 x float> @test_v16f32_mul_sub_one_x_y_ninf(<16 x float> %x, <16 x float> %y) { +; FMA-LABEL: test_v16f32_mul_sub_one_x_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_one_x_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA4-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_one_x_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq + %s = fsub contract ninf <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x + %m = fmul contract ninf <16 x float> %s, %y + ret <16 x float> %m +} + define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) { -; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 -; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 -; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_one_x: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0 -; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm2 -; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2 -; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f64_mul_y_sub_one_x: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubpd %ymm1, %ymm4, %ymm1 +; FMA-NEXT: vsubpd %ymm0, %ymm4, %ymm0 +; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_sub_one_x: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubpd %ymm1, %ymm4, %ymm1 +; FMA4-NEXT: vsubpd %ymm0, %ymm4, %ymm0 +; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_sub_one_x: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } +define <8 x double> @test_v8f64_mul_y_sub_one_x_ninf(<8 x double> %x, <8 x double> %y) { +; FMA-LABEL: test_v8f64_mul_y_sub_one_x_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm2 +; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_sub_one_x_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_sub_one_x_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x + %m = fmul contract ninf <8 x double> %y, %s + ret <8 x double> %m +} + define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) { -; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 -; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 -; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0 -; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v16f32_mul_sub_negone_x_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA4-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vsubps %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } +define <16 x float> @test_v16f32_mul_sub_negone_x_y_ninf(<16 x float> %x, <16 x float> %y) { +; FMA-LABEL: test_v16f32_mul_sub_negone_x_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; FMA4-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vsubps %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq + %s = fsub contract ninf <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x + %m = fmul contract ninf <16 x float> %s, %y + ret <16 x float> %m +} + define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) { -; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 -; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 -; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0 -; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f64_mul_y_sub_negone_x: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vsubpd %ymm1, %ymm4, %ymm1 +; FMA-NEXT: vsubpd %ymm0, %ymm4, %ymm0 +; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vsubpd %ymm1, %ymm4, %ymm1 +; FMA4-NEXT: vsubpd %ymm0, %ymm4, %ymm0 +; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } +define <8 x double> @test_v8f64_mul_y_sub_negone_x_ninf(<8 x double> %x, <8 x double> %y) { +; FMA-LABEL: test_v8f64_mul_y_sub_negone_x_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm2 +; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2 +; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x + %m = fmul contract ninf <8 x double> %y, %s + ret <8 x double> %m +} + define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) { -; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v16f32_mul_sub_x_one_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_x_one_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_x_one_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } +define <16 x float> @test_v16f32_mul_sub_x_one_y_ninf(<16 x float> %x, <16 x float> %y) { +; FMA-LABEL: test_v16f32_mul_sub_x_one_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_x_one_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_x_one_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> + %m = fmul contract ninf <16 x float> %s, %y + ret <16 x float> %m +} + define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) { -; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f64_mul_y_sub_x_one: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_sub_x_one: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA4-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_sub_x_one: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } +define <8 x double> @test_v8f64_mul_y_sub_x_one_ninf(<8 x double> %x, <8 x double> %y) { +; FMA-LABEL: test_v8f64_mul_y_sub_x_one_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_sub_x_one_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_sub_x_one_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> + %m = fmul contract ninf <8 x double> %y, %s + ret <8 x double> %m +} + define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) { -; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 -; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 -; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v16f32_mul_sub_x_negone_y: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vaddps %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 +; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } +define <16 x float> @test_v16f32_mul_sub_x_negone_y_ninf(<16 x float> %x, <16 x float> %y) { +; FMA-LABEL: test_v16f32_mul_sub_x_negone_y_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 +; FMA-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> + %m = fmul contract ninf <16 x float> %s, %y + ret <16 x float> %m +} + define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) { -; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 -; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 -; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 -; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 -; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f64_mul_y_sub_x_negone: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vaddpd %ymm4, %ymm1, %ymm1 +; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0 +; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %s = fsub contract <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } +define <8 x double> @test_v8f64_mul_y_sub_x_negone_ninf(<8 x double> %x, <8 x double> %y) { +; FMA-LABEL: test_v8f64_mul_y_sub_x_negone_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 +; FMA-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 +; FMA-NEXT: retq ; -; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) +; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 +; FMA4-NEXT: retq ; +; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 +; AVX512-NEXT: retq + %s = fsub contract ninf <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> + %m = fmul contract ninf <8 x double> %y, %s + ret <8 x double> %m +} + define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) { -; FMA-INFS-LABEL: test_v16f32_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7 -; FMA-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6 -; FMA-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3 -; FMA-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2 -; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2 -; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v16f32_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7 -; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6 -; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3 -; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2 -; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 -; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v16f32_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubps %zmm2, %zmm3, %zmm3 -; AVX512-INFS-NEXT: vmulps %zmm3, %zmm1, %zmm1 -; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v16f32_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v16f32_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v16f32_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 -; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v16f32_interp: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubps %ymm4, %ymm6, %ymm7 +; FMA-NEXT: vsubps %ymm5, %ymm6, %ymm6 +; FMA-NEXT: vmulps %ymm6, %ymm3, %ymm3 +; FMA-NEXT: vmulps %ymm7, %ymm2, %ymm2 +; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2 +; FMA-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubps %ymm4, %ymm6, %ymm7 +; FMA4-NEXT: vsubps %ymm5, %ymm6, %ymm6 +; FMA4-NEXT: vmulps %ymm6, %ymm3, %ymm3 +; FMA4-NEXT: vmulps %ymm7, %ymm2, %ymm2 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 +; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubps %zmm2, %zmm3, %zmm3 +; AVX512-NEXT: vmulps %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t %tx = fmul contract nsz <16 x float> %x, %t %ty = fmul contract nsz <16 x float> %y, %t1 @@ -875,58 +922,65 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x ret <16 x float> %r } +define <16 x float> @test_v16f32_interp_ninf(<16 x float> %x, <16 x float> %y, <16 x float> %t) { +; FMA-LABEL: test_v16f32_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2 +; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v16f32_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 +; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v16f32_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 +; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t + %tx = fmul contract ninf nsz <16 x float> %x, %t + %ty = fmul contract ninf nsz <16 x float> %y, %t1 + %r = fadd contract ninf nsz <16 x float> %tx, %ty + ret <16 x float> %r +} + define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) { -; FMA-INFS-LABEL: test_v8f64_interp: -; FMA-INFS: # %bb.0: -; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7 -; FMA-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6 -; FMA-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3 -; FMA-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2 -; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2 -; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3 -; FMA-INFS-NEXT: retq -; -; FMA4-INFS-LABEL: test_v8f64_interp: -; FMA4-INFS: # %bb.0: -; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7 -; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6 -; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3 -; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2 -; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 -; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 -; FMA4-INFS-NEXT: retq -; -; AVX512-INFS-LABEL: test_v8f64_interp: -; AVX512-INFS: # %bb.0: -; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-INFS-NEXT: vsubpd %zmm2, %zmm3, %zmm3 -; AVX512-INFS-NEXT: vmulpd %zmm3, %zmm1, %zmm1 -; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1 -; AVX512-INFS-NEXT: retq -; -; FMA-NOINFS-LABEL: test_v8f64_interp: -; FMA-NOINFS: # %bb.0: -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2 -; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3 -; FMA-NOINFS-NEXT: retq -; -; FMA4-NOINFS-LABEL: test_v8f64_interp: -; FMA4-NOINFS: # %bb.0: -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 -; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 -; FMA4-NOINFS-NEXT: retq -; -; AVX512-NOINFS-LABEL: test_v8f64_interp: -; AVX512-NOINFS: # %bb.0: -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 -; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 -; AVX512-NOINFS-NEXT: retq +; FMA-LABEL: test_v8f64_interp: +; FMA: # %bb.0: +; FMA-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-NEXT: vsubpd %ymm4, %ymm6, %ymm7 +; FMA-NEXT: vsubpd %ymm5, %ymm6, %ymm6 +; FMA-NEXT: vmulpd %ymm6, %ymm3, %ymm3 +; FMA-NEXT: vmulpd %ymm7, %ymm2, %ymm2 +; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2 +; FMA-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_interp: +; FMA4: # %bb.0: +; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA4-NEXT: vsubpd %ymm4, %ymm6, %ymm7 +; FMA4-NEXT: vsubpd %ymm5, %ymm6, %ymm6 +; FMA4-NEXT: vmulpd %ymm6, %ymm3, %ymm3 +; FMA4-NEXT: vmulpd %ymm7, %ymm2, %ymm2 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_interp: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vsubpd %zmm2, %zmm3, %zmm3 +; AVX512-NEXT: vmulpd %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1 +; AVX512-NEXT: retq %t1 = fsub contract nsz <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t %tx = fmul contract nsz <8 x double> %x, %t %ty = fmul contract nsz <8 x double> %y, %t1 @@ -934,6 +988,35 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do ret <8 x double> %r } +define <8 x double> @test_v8f64_interp_ninf(<8 x double> %x, <8 x double> %y, <8 x double> %t) { +; FMA-LABEL: test_v8f64_interp_ninf: +; FMA: # %bb.0: +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2 +; FMA-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3 +; FMA-NEXT: retq +; +; FMA4-LABEL: test_v8f64_interp_ninf: +; FMA4: # %bb.0: +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 +; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 +; FMA4-NEXT: retq +; +; AVX512-LABEL: test_v8f64_interp_ninf: +; AVX512: # %bb.0: +; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 +; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 +; AVX512-NEXT: retq + %t1 = fsub contract ninf nsz <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t + %tx = fmul contract ninf nsz <8 x double> %x, %t + %ty = fmul contract ninf nsz <8 x double> %y, %t1 + %r = fadd contract ninf nsz <8 x double> %tx, %ty + ret <8 x double> %r +} + ; ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) ; @@ -1168,3 +1251,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> % } attributes #0 = { "unsafe-fp-math"="true" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX512-INFS: {{.*}} +; FMA-INFS: {{.*}} +; FMA4-INFS: {{.*}} diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index df97f49..252cb33 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -574,9 +574,9 @@ define <4 x i32> @fshl_v4i32_undef1_cst(<4 x i32> %a0) nounwind { ; X86-SSE2-LABEL: fshl_v4i32_undef1_cst: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [512,1024,2048,4096] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1024,u,4096,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE2-NEXT: retl @@ -746,9 +746,9 @@ define <4 x i32> @fshr_v4i32_undef1_cst(<4 x i32> %a0) nounwind { ; X86-SSE2-LABEL: fshr_v4i32_undef1_cst: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [8388608,4194304,2097152,1048576] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [4194304,u,1048576,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll index a0c243b..b2b0a6d 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -1,18 +1,101 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -;; A minimal test case. llc will crash if global variables already has a section -;; prefix. Subsequent PRs will expand on this test case to test the hotness -;; reconciliation implementation. +;; Requires asserts for -debug-only. +; REQUIRES: asserts -; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +; RUN: rm -rf %t && split-file %s %t && cd %t + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +; RUN: -partition-static-data-sections=true \ +; RUN: -debug-only=static-data-profile-info \ +; RUN: -data-sections=true -unique-section-names=false \ +; RUN: input-with-data-access-prof-on.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ ; RUN: -partition-static-data-sections=true \ +; RUN: -debug-only=static-data-profile-info \ ; RUN: -data-sections=true -unique-section-names=false \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: input-with-data-access-prof-off.ll -o - 2>&1 | FileCheck %s --check-prefixes=OFF + +; LOG: hot_bss has section prefix hot, the max from data access profiles as hot and PGO counters as hot +; LOG: data_unknown_hotness has section prefix <empty>, the max from data access profiles as <empty> and PGO counters as unlikely +; LOG: external_relro_array has section prefix unlikely, solely from data access profiles + +; IR: .type hot_bss,@object +; IR-NEXT: .section .bss.hot.,"aw" +; IR: .type data_unknown_hotness,@object +; IR-NEXT: .section .data,"aw" +; IR: .type external_relro_array,@object +; IR-NEXT: .section .data.rel.ro.unlikely.,"aw" + + +; OFF: .type hot_bss,@object +; OFF-NEXT: .section .bss.hot.,"aw" +; OFF: .type data_unknown_hotness,@object +; OFF-NEXT: .section .data.unlikely.,"aw" +;; Global variable section prefix metadata is not used when +;; module flag `EnableDataAccessProf` is 0, and @external_relro_array has +;; external linkage, so analysis based on PGO counters doesn't apply. +; OFF: .type external_relro_array,@object # @external_relro_array +; OFF-NEXT: .section .data.rel.ro,"aw" + +;--- input-with-data-access-prof-on.ll +; Internal vars +@hot_bss = internal global i32 0, !section_prefix !17 +@data_unknown_hotness = internal global i32 1 +; External vars +@external_relro_array = constant [2 x ptr] [ptr @hot_bss, ptr @data_unknown_hotness], !section_prefix !18 + +define void @cold_func() !prof !15 { + %9 = load i32, ptr @data_unknown_hotness + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} + +define void @hot_func() !prof !14 { + %9 = load i32, ptr @hot_bss + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} + +declare i32 @func_taking_arbitrary_param(...) -; ERR: Global variable hot_bss already has a section prefix hot +!llvm.module.flags = !{!0, !1} +!0 = !{i32 2, !"EnableDataAccessProf", i32 1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999} +!17 = !{!"section_prefix", !"hot"} +!18 = !{!"section_prefix", !"unlikely"} + +;--- input-with-data-access-prof-off.ll +; Same as file above except that module flag `EnableDataAccessProf` has value 0. +; Internal vars @hot_bss = internal global i32 0, !section_prefix !17 +@data_unknown_hotness = internal global i32 1 +; External vars +@external_relro_array = constant [2 x ptr] [ptr @hot_bss, ptr @data_unknown_hotness], !section_prefix !18 + +define void @cold_func() !prof !15 { + %9 = load i32, ptr @data_unknown_hotness + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} define void @hot_func() !prof !14 { %9 = load i32, ptr @hot_bss @@ -22,8 +105,9 @@ define void @hot_func() !prof !14 { declare i32 @func_taking_arbitrary_param(...) -!llvm.module.flags = !{!1} +!llvm.module.flags = !{!0, !1} +!0 = !{i32 2, !"EnableDataAccessProf", i32 0} !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} !3 = !{!"ProfileFormat", !"InstrProf"} @@ -41,3 +125,4 @@ declare i32 @func_taking_arbitrary_param(...) !15 = !{!"function_entry_count", i64 1} !16 = !{!"branch_weights", i32 1, i32 99999} !17 = !{!"section_prefix", !"hot"} +!18 = !{!"section_prefix", !"unlikely"} diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index ce06d17..604b4fd 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -106,23 +106,31 @@ target triple = "x86_64-unknown-linux-gnu" ; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8 ; AGG-NEXT: .section .data.unlikely.,"aw",@progbits +;; The `.section` directive is omitted for .data with -unique-section-names=false. +; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; For @data_with_unknown_hotness ; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness ; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits ; UNIQ: .section .data,"aw",@progbits,unique,9 -; The `.section` directive is omitted for .data with -unique-section-names=false. -; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; AGG: .data ; COMMON: .Ldata_with_unknown_hotness: -; For @hot_data_custom_bar_section -; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix. +; For variables that are not eligible for section prefix annotation ; COMMON: .type hot_data_custom_bar_section,@object ; SYM-NEXT: .section bar,"aw",@progbits ; SYM: hot_data_custom_bar_section ; UNIQ: .section bar,"aw",@progbits ; AGG: .section bar,"aw",@progbits +; SYM: .section .data.llvm.fake_var,"aw" +; UNIQ: .section .data,"aw" +; AGG: .data + +;; No section for linker declaration +; COMMON-NOT: qux + @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 @hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] @@ -137,6 +145,8 @@ target triple = "x86_64-unknown-linux-gnu" @data3 = internal global i32 3 @data_with_unknown_hotness = private global i32 5 @hot_data_custom_bar_section = internal global i32 101 #0 +@llvm.fake_var = internal global i32 123 +@qux = external global i64 define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll index 1a2aac6..b45d01e 100644 --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -499,9 +499,9 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [0,1,16776960,2147483648] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [1,u,2147483648,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE2-NEXT: pand %xmm1, %xmm0 @@ -524,9 +524,9 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,1,16776960,2147483648] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,u,2147483648,u] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE2-NEXT: pand %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll index e183bbc..019bca7 100644 --- a/llvm/test/CodeGen/X86/known-pow2.ll +++ b/llvm/test/CodeGen/X86/known-pow2.ll @@ -28,16 +28,16 @@ define <4 x i32> @pow2_non_splat_vec_fail0(<4 x i32> %x) { ; CHECK-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1073741824,u,67108864,u] ; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3] ; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; CHECK-NEXT: movdqa %xmm1, %xmm3 ; CHECK-NEXT: psrld $1, %xmm3 ; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,3] -; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [9,4,16,64] ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,u,64,u] ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-NEXT: psubd %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index bdb7c30..2a2a4a5 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -2057,10 +2057,10 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) { ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] ; SSE2-NEXT: psrad $16, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294934528,0,0,0] ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] ; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,0,7,0,42,0,32,0] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32768,4294934528,0,0] ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2] ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] ; SSE2-NEXT: paddd %xmm2, %xmm1 @@ -2071,15 +2071,15 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) { ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,7,42,32] +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32768,4294934528,0,0] ; AVX1-NEXT: vphaddd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX256-LABEL: pmaddwd_negative2: ; AVX256: # %bb.0: ; AVX256-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX256-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX256-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,4294934528,0,0,1,7,42,32] ; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX256-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX256-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 4cde581..caec02e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -4765,6 +4765,66 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32 } declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>) +define <16 x i32> @pr163023_sext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_sext: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1} +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr163023_sext: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kxnorw %k0, %k0, %k1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl + %addr.p = ptrtoint ptr %a0 to i64 + %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 + %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer + %ofs = sext <16 x i32> %a1 to <16 x i64> + %addr = add nuw <16 x i64> %addr.splat, %ofs + %ptr = inttoptr <16 x i64> %addr to <16 x ptr> + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + ret <16 x i32> %gather +} + +define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_zext: +; X64: # %bb.0: +; X64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; X64-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: kxnorw %k0, %k0, %k1 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-NEXT: kxnorw %k0, %k0, %k2 +; X64-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2} +; X64-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1} +; X64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr163023_zext: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kxnorw %k0, %k0, %k1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl + %addr.p = ptrtoint ptr %a0 to i64 + %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 + %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer + %ofs = zext <16 x i32> %a1 to <16 x i64> + %addr = add nuw <16 x i64> %addr.splat, %ofs + %ptr = inttoptr <16 x i64> %addr to <16 x ptr> + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + ret <16 x i32> %gather +} + ; ; PR45906 ; This used to cause fast-isel to generate bad copy instructions that would diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index d752659..04f0a65 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s --check-prefixes=CHECK,CHECK-SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s --check-prefixes=CHECK,CHECK-SKX,CHECK-SKX-NOVBMI ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit,avx512vbmi | FileCheck %s --check-prefixes=CHECK,CHECK-SKX,CHECK-SKX-VBMI ; Make sure CPUs default to prefer-256-bit. avx512vnni isn't interesting as it just adds an isel peephole for vpmaddwd+vpaddd ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512 @@ -883,6 +883,30 @@ define <16 x i16> @test_16f32tosb_512(ptr %ptr, <16 x i16> %passthru) "min-legal } define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="256" { +; CHECK-SKX-NOVBMI-LABEL: mul256: +; CHECK-SKX-NOVBMI: # %bb.0: +; CHECK-SKX-NOVBMI-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-SKX-NOVBMI-NEXT: vmovdqa 32(%rdi), %ymm1 +; CHECK-SKX-NOVBMI-NEXT: vmovdqa (%rsi), %ymm2 +; CHECK-SKX-NOVBMI-NEXT: vmovdqa 32(%rsi), %ymm3 +; CHECK-SKX-NOVBMI-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; CHECK-SKX-NOVBMI-NEXT: vpand %ymm4, %ymm3, %ymm5 +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm5, %ymm1, %ymm5 +; CHECK-SKX-NOVBMI-NEXT: vpandn %ymm3, %ymm4, %ymm3 +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1 +; CHECK-SKX-NOVBMI-NEXT: vpsllw $8, %ymm1, %ymm1 +; CHECK-SKX-NOVBMI-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm5 & ymm4) +; CHECK-SKX-NOVBMI-NEXT: vpand %ymm4, %ymm2, %ymm3 +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 +; CHECK-SKX-NOVBMI-NEXT: vpandn %ymm2, %ymm4, %ymm2 +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm2, %ymm0, %ymm0 +; CHECK-SKX-NOVBMI-NEXT: vpsllw $8, %ymm0, %ymm0 +; CHECK-SKX-NOVBMI-NEXT: vpternlogq {{.*#+}} ymm0 = ymm0 | (ymm3 & ymm4) +; CHECK-SKX-NOVBMI-NEXT: vmovdqa %ymm0, (%rdx) +; CHECK-SKX-NOVBMI-NEXT: vmovdqa %ymm1, 32(%rdx) +; CHECK-SKX-NOVBMI-NEXT: vzeroupper +; CHECK-SKX-NOVBMI-NEXT: retq +; ; CHECK-SKX-VBMI-LABEL: mul256: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm0 @@ -960,6 +984,21 @@ define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" } define dso_local void @mul512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="512" { +; CHECK-SKX-NOVBMI-LABEL: mul512: +; CHECK-SKX-NOVBMI: # %bb.0: +; CHECK-SKX-NOVBMI-NEXT: vmovdqa64 (%rdi), %zmm0 +; CHECK-SKX-NOVBMI-NEXT: vmovdqa64 (%rsi), %zmm1 +; CHECK-SKX-NOVBMI-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; CHECK-SKX-NOVBMI-NEXT: vpandq %zmm2, %zmm1, %zmm3 +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %zmm3, %zmm0, %zmm3 +; CHECK-SKX-NOVBMI-NEXT: vpandnq %zmm1, %zmm2, %zmm1 +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 +; CHECK-SKX-NOVBMI-NEXT: vpsllw $8, %zmm0, %zmm0 +; CHECK-SKX-NOVBMI-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 | (zmm3 & zmm2) +; CHECK-SKX-NOVBMI-NEXT: vmovdqa64 %zmm0, (%rdx) +; CHECK-SKX-NOVBMI-NEXT: vzeroupper +; CHECK-SKX-NOVBMI-NEXT: retq +; ; CHECK-SKX-VBMI-LABEL: mul512: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa64 (%rdi), %zmm0 @@ -1137,6 +1176,14 @@ define <16 x i16> @trunc_v16i32_v16i16_zeroes(ptr %x) nounwind "min-legal-vector } define <32 x i8> @trunc_v32i16_v32i8_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" { +; CHECK-SKX-NOVBMI-LABEL: trunc_v32i16_v32i8_zeroes: +; CHECK-SKX-NOVBMI: # %bb.0: +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, 32(%rdi), %ymm0 +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, (%rdi), %ymm1 +; CHECK-SKX-NOVBMI-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 +; CHECK-SKX-NOVBMI-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; CHECK-SKX-NOVBMI-NEXT: retq +; ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_zeroes: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm1 @@ -1192,6 +1239,14 @@ define <16 x i16> @trunc_v16i32_v16i16_sign(ptr %x) nounwind "min-legal-vector-w } define <32 x i8> @trunc_v32i16_v32i8_sign(ptr %x) nounwind "min-legal-vector-width"="256" { +; CHECK-SKX-NOVBMI-LABEL: trunc_v32i16_v32i8_sign: +; CHECK-SKX-NOVBMI: # %bb.0: +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, 32(%rdi), %ymm0 +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, (%rdi), %ymm1 +; CHECK-SKX-NOVBMI-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 +; CHECK-SKX-NOVBMI-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; CHECK-SKX-NOVBMI-NEXT: retq +; ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_sign: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm1 diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index 693d199..9729fd7 100644 --- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -100,7 +100,7 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; SSE4-LABEL: p4_vector_urem_by_const__splat: ; SSE4: # %bb.0: ; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; SSE4-NEXT: psrld $1, %xmm0 ; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883] ; SSE4-NEXT: pcmpgtd %xmm0, %xmm1 @@ -128,10 +128,10 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) { ; SSE2: # %bb.0: ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,u,954437177,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,3435973837,2863311531,954437177] +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,2147483648,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: psrlq $32, %xmm0 @@ -145,7 +145,7 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) { ; SSE4-LABEL: p5_vector_urem_by_const__nonsplat: ; SSE4: # %bb.0: ; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,3435973837,2863311531,954437177] ; SSE4-NEXT: pmovzxdq {{.*#+}} xmm1 = [1,2147483648] ; SSE4-NEXT: pmuludq %xmm0, %xmm1 ; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] @@ -159,7 +159,7 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) { ; AVX2-LABEL: p5_vector_urem_by_const__nonsplat: ; AVX2: # %bb.0: ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,3435973837,2863311531,954437177] ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -196,7 +196,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32 ; SSE4-LABEL: p6_vector_urem_by_const__nonsplat_undef0: ; SSE4: # %bb.0: ; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; SSE4-NEXT: movdqa %xmm0, %xmm1 ; SSE4-NEXT: psrld $1, %xmm1 ; SSE4-NEXT: pslld $31, %xmm0 @@ -312,7 +312,7 @@ define <4 x i1> @p8_vector_urem_by_const__nonsplat_undef3(<4 x i32> %x, <4 x i32 ; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE4-NEXT: psrld $2, %xmm2 -; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,6,6,6] ; SSE4-NEXT: psubd %xmm2, %xmm0 ; SSE4-NEXT: pxor %xmm1, %xmm1 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 9aee2f1..00731fe 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -91,7 +91,7 @@ define <4 x i32> @mul_v4i32c(<4 x i32> %i) nounwind { ; ; SSE41-LABEL: mul_v4i32c: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [117,117,117,117] ; SSE41-NEXT: retq ; ; AVX-LABEL: mul_v4i32c: diff --git a/llvm/test/CodeGen/X86/pr160612.ll b/llvm/test/CodeGen/X86/pr160612.ll new file mode 100644 index 0000000..6572c42 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr160612.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s + +; Test for issue #160612: OR conditions in branches should use multiple branches +; instead of materializing booleans with SETCC when no special optimizations apply. + +declare void @subroutine_foo() +declare void @subroutine_bar() + +; Original issue: (x == 0 || y == 0) was generating SETCC + TEST + BRANCH +; instead of using two conditional branches directly. +define void @func_a(i32 noundef %x, i32 noundef %y) { +; CHECK-LABEL: func_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: jne subroutine_bar@PLT # TAILCALL +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: jmp subroutine_foo@PLT # TAILCALL +entry: + %cmp = icmp eq i32 %x, 0 + %cmp1 = icmp eq i32 %y, 0 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.else + +if.then: + tail call void @subroutine_foo() + br label %if.end + +if.else: + tail call void @subroutine_bar() + br label %if.end + +if.end: + ret void +} + +; Reference implementation that already generated optimal code. +; This should continue to generate the same optimal code. +define void @func_b(i32 noundef %x, i32 noundef %y) { +; CHECK-LABEL: func_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.2: # %if.else3 +; CHECK-NEXT: jmp subroutine_bar@PLT # TAILCALL +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + tail call void @subroutine_foo() + br label %if.end4 + +if.else: + %cmp1 = icmp eq i32 %y, 0 + br i1 %cmp1, label %if.then2, label %if.else3 + +if.then2: + tail call void @subroutine_foo() + br label %if.end4 + +if.else3: + tail call void @subroutine_bar() + br label %if.end4 + +if.end4: + ret void +} diff --git a/llvm/test/CodeGen/X86/pr162812.ll b/llvm/test/CodeGen/X86/pr162812.ll index 4ea3101..cec093c 100644 --- a/llvm/test/CodeGen/X86/pr162812.ll +++ b/llvm/test/CodeGen/X86/pr162812.ll @@ -34,61 +34,43 @@ define <32 x i8> @PR162812(<32 x i8> %a, <32 x i8> %mask) { ; ; SSE42-LABEL: PR162812: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa %xmm2, %xmm5 -; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: movdqa %xmm0, %xmm4 +; SSE42-NEXT: psrlw $2, %xmm2 +; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [8224,8224,8224,8224,8224,8224,8224,8224] +; SSE42-NEXT: pand %xmm5, %xmm2 +; SSE42-NEXT: paddb %xmm2, %xmm2 +; SSE42-NEXT: paddb %xmm2, %xmm2 ; SSE42-NEXT: movdqa %xmm0, %xmm6 -; SSE42-NEXT: psllw $2, %xmm6 -; SSE42-NEXT: movdqa {{.*#+}} xmm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] -; SSE42-NEXT: pand %xmm7, %xmm6 -; SSE42-NEXT: psrlw $2, %xmm5 -; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [8224,8224,8224,8224,8224,8224,8224,8224] -; SSE42-NEXT: pand %xmm4, %xmm5 +; SSE42-NEXT: paddb %xmm0, %xmm6 +; SSE42-NEXT: movdqa %xmm2, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm4 +; SSE42-NEXT: psrlw $2, %xmm3 +; SSE42-NEXT: pand %xmm3, %xmm5 ; SSE42-NEXT: paddb %xmm5, %xmm5 -; SSE42-NEXT: movdqa %xmm5, %xmm0 -; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2 -; SSE42-NEXT: movdqa %xmm2, %xmm6 -; SSE42-NEXT: paddb %xmm2, %xmm6 ; SSE42-NEXT: paddb %xmm5, %xmm5 +; SSE42-NEXT: movdqa %xmm1, %xmm2 +; SSE42-NEXT: paddb %xmm1, %xmm2 ; SSE42-NEXT: movdqa %xmm5, %xmm0 -; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2 -; SSE42-NEXT: movdqa %xmm1, %xmm5 -; SSE42-NEXT: psllw $2, %xmm5 -; SSE42-NEXT: pand %xmm7, %xmm5 -; SSE42-NEXT: psrlw $2, %xmm3 -; SSE42-NEXT: pand %xmm3, %xmm4 -; SSE42-NEXT: paddb %xmm4, %xmm4 -; SSE42-NEXT: movdqa %xmm4, %xmm0 -; SSE42-NEXT: pblendvb %xmm0, %xmm5, %xmm1 -; SSE42-NEXT: movdqa %xmm1, %xmm3 -; SSE42-NEXT: paddb %xmm1, %xmm3 -; SSE42-NEXT: paddb %xmm4, %xmm4 +; SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; SSE42-NEXT: movdqa %xmm4, %xmm0 -; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1 -; SSE42-NEXT: movdqa %xmm2, %xmm0 ; SSE42-NEXT: retq ; ; AVX2-LABEL: PR162812: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlw $2, %ymm1, %ymm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: PR162812: ; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 +; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; AVX512-NEXT: vpsrlw $2, %ymm1, %ymm1 ; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1 ; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr49087.ll b/llvm/test/CodeGen/X86/pr49087.ll deleted file mode 100644 index 1a29222..0000000 --- a/llvm/test/CodeGen/X86/pr49087.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -global-isel < %s 2>&1 | FileCheck %s -; REQUIRES: asserts -; XFAIL: * - -define i32 @test_01(ptr %p, i64 %len, i32 %x) { -; CHECK-LABEL: test_01 - -entry: - %scevgep = getelementptr i32, ptr %p, i64 -1 - br label %loop - -loop: ; preds = %backedge, %entry - %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] - %iv.next = add i64 %iv, -1 - %cond_1 = icmp eq i64 %iv, 0 - br i1 %cond_1, label %exit, label %backedge - -backedge: ; preds = %loop - %scevgep1 = getelementptr i32, ptr %scevgep, i64 %iv - %loaded = load atomic i32, ptr %scevgep1 unordered, align 4 - %cond_2 = icmp eq i32 %loaded, %x - br i1 %cond_2, label %failure, label %loop - -exit: ; preds = %loop - ret i32 -1 - -failure: - unreachable -} - diff --git a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll index 885b075..59b03f8 100644 --- a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll +++ b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll @@ -9,7 +9,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) { ; AVX256BW: # %bb.0: ; AVX256BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX256BW-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX256BW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] diff --git a/llvm/test/CodeGen/X86/relptr-rodata.ll b/llvm/test/CodeGen/X86/relptr-rodata.ll index ea22b08..954ea8f 100644 --- a/llvm/test/CodeGen/X86/relptr-rodata.ll +++ b/llvm/test/CodeGen/X86/relptr-rodata.ll @@ -10,16 +10,31 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: .long hidden-rodata @rodata = hidden constant i32 trunc (i64 sub (i64 ptrtoint (ptr @hidden to i64), i64 ptrtoint (ptr @rodata to i64)) to i32) +; CHECK: .section .rodata.rodata_ptrtoaddr +; CHECK: rodata_ptrtoaddr: +; CHECK: .long hidden-rodata_ptrtoaddr +@rodata_ptrtoaddr = hidden constant i32 trunc (i64 sub (i64 ptrtoaddr (ptr @hidden to i64), i64 ptrtoaddr (ptr @rodata_ptrtoaddr to i64)) to i32) + ; CHECK: .section .data.rel.ro.relro1 ; CHECK: relro1: ; CHECK: .long default-relro1 @relro1 = hidden constant i32 trunc (i64 sub (i64 ptrtoint (ptr @default to i64), i64 ptrtoint (ptr @relro1 to i64)) to i32) +; CHECK: .section .data.rel.ro.relro1_ptrtoaddr +; CHECK: relro1_ptrtoaddr: +; CHECK: .long default-relro1_ptrtoaddr +@relro1_ptrtoaddr = hidden constant i32 trunc (i64 sub (i64 ptrtoaddr (ptr @default to i64), i64 ptrtoaddr (ptr @relro1_ptrtoaddr to i64)) to i32) + ; CHECK: .section .data.rel.ro.relro2 ; CHECK: relro2: ; CHECK: .long hidden-relro2 @relro2 = constant i32 trunc (i64 sub (i64 ptrtoint (ptr @hidden to i64), i64 ptrtoint (ptr @relro2 to i64)) to i32) +; CHECK: .section .data.rel.ro.relro2_ptrtoaddr +; CHECK: relro2_ptrtoaddr: +; CHECK: .long hidden-relro2_ptrtoaddr +@relro2_ptrtoaddr = constant i32 trunc (i64 sub (i64 ptrtoaddr (ptr @hidden to i64), i64 ptrtoaddr (ptr @relro2_ptrtoaddr to i64)) to i32) + ; CHECK: .section .rodata.obj ; CHECK-NEXT: .globl obj ; CHECK: obj: diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll index 1ead3f9..7d0ec64 100644 --- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll +++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll @@ -149,19 +149,12 @@ define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind { ; Result would undershift define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind { -; X86-LABEL: no_extract_shl: -; X86: # %bb.0: -; X86-NEXT: vpsllq $24, %ymm0, %ymm1 -; X86-NEXT: vpsrlq $39, %ymm0, %ymm0 -; X86-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm1, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: no_extract_shl: -; X64: # %bb.0: -; X64-NEXT: vpsllq $24, %ymm0, %ymm1 -; X64-NEXT: vpsrlq $39, %ymm0, %ymm0 -; X64-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: no_extract_shl: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsllq $24, %ymm0, %ymm1 +; CHECK-NEXT: vpsrlq $39, %ymm0, %ymm0 +; CHECK-NEXT: vpternlogq {{.*#+}} ymm0 = (ymm0 & m64bcst) | ymm1 +; CHECK-NEXT: ret{{[l|q]}} %lhs_mul = shl <4 x i64> %i, <i64 11, i64 11, i64 11, i64 11> %rhs_mul = shl <4 x i64> %i, <i64 24, i64 24, i64 24, i64 24> %lhs_shift = lshr <4 x i64> %lhs_mul, <i64 50, i64 50, i64 50, i64 50> @@ -171,19 +164,12 @@ define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind { ; Result would overshift define <4 x i32> @no_extract_shrl(<4 x i32> %i) nounwind { -; X86-LABEL: no_extract_shrl: -; X86: # %bb.0: -; X86-NEXT: vpsrld $9, %xmm0, %xmm1 -; X86-NEXT: vpslld $25, %xmm0, %xmm0 -; X86-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm1, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: no_extract_shrl: -; X64: # %bb.0: -; X64-NEXT: vpsrld $9, %xmm0, %xmm1 -; X64-NEXT: vpslld $25, %xmm0, %xmm0 -; X64-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: no_extract_shrl: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsrld $9, %xmm0, %xmm1 +; CHECK-NEXT: vpslld $25, %xmm0, %xmm0 +; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = (xmm0 & m32bcst) | xmm1 +; CHECK-NEXT: ret{{[l|q]}} %lhs_div = lshr <4 x i32> %i, <i32 3, i32 3, i32 3, i32 3> %rhs_div = lshr <4 x i32> %i, <i32 9, i32 9, i32 9, i32 9> %lhs_shift = shl <4 x i32> %lhs_div, <i32 28, i32 28, i32 28, i32 28> diff --git a/llvm/test/CodeGen/X86/sdiv-exact.ll b/llvm/test/CodeGen/X86/sdiv-exact.ll index 4568191..7873ffa 100644 --- a/llvm/test/CodeGen/X86/sdiv-exact.ll +++ b/llvm/test/CodeGen/X86/sdiv-exact.ll @@ -87,7 +87,7 @@ define <4 x i32> @test5(<4 x i32> %x) { ; X86-NEXT: pmuludq %xmm1, %xmm0 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2863311531,u,3264175145,u] ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: retl @@ -95,7 +95,7 @@ define <4 x i32> @test5(<4 x i32> %x) { ; X64-LABEL: test5: ; X64: # %bb.0: ; X64-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3264175145,3264175145] ; X64-NEXT: retq %div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 25, i32 25> ret <4 x i32> %div @@ -112,7 +112,7 @@ define <4 x i32> @test6(<4 x i32> %x) { ; X86-NEXT: pmuludq %xmm0, %xmm1 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2863311531,u,3303820997,u] ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X86-NEXT: movdqa %xmm1, %xmm0 @@ -121,7 +121,7 @@ define <4 x i32> @test6(<4 x i32> %x) { ; X64-LABEL: test6: ; X64: # %bb.0: ; X64-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3303820997,3303820997] ; X64-NEXT: retq %div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 26, i32 26> ret <4 x i32> %div @@ -131,16 +131,16 @@ define <4 x i32> @test7(<4 x i32> %x) { ; X86-LABEL: test7: ; X86: # %bb.0: ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [3264175145,3264175145,1749801491,1749801491] ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [3264175145,u,1749801491,u] ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: retl ; ; X64-LABEL: test7: ; X64: # %bb.0: -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,1749801491,1749801491] ; X64-NEXT: retq %div = sdiv exact <4 x i32> %x, <i32 25, i32 25, i32 27, i32 27> ret <4 x i32> %div @@ -156,7 +156,7 @@ define <4 x i32> @test8(<4 x i32> %x) { ; X86-NEXT: pmuludq %xmm1, %xmm0 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,u,2863311531,u] ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: retl @@ -164,7 +164,7 @@ define <4 x i32> @test8(<4 x i32> %x) { ; X64-LABEL: test8: ; X64: # %bb.0: ; X64-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,2863311531,2863311531] ; X64-NEXT: retq %div = sdiv exact <4 x i32> %x, <i32 1, i32 1, i32 24, i32 24> ret <4 x i32> %div diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index 5aa266d..d018c53 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -1447,3 +1447,175 @@ define i1 @eq_i512_load_arg(ptr%p, i512 %b) { %r = icmp eq i512 %a, %b ret i1 %r } + +; Tests for any/allbits from memory. + +define i1 @anybits_i128_load_arg(ptr %w) { +; ANY-LABEL: anybits_i128_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq (%rdi), %rax +; ANY-NEXT: orq 8(%rdi), %rax +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp ne i128 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i128_load_arg(ptr %w) { +; SSE2-LABEL: allbits_i128_load_arg: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqb (%rdi), %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE41-LABEL: allbits_i128_load_arg: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa (%rdi), %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: ptest %xmm1, %xmm0 +; SSE41-NEXT: setb %al +; SSE41-NEXT: retq +; +; AVXANY-LABEL: allbits_i128_load_arg: +; AVXANY: # %bb.0: +; AVXANY-NEXT: vmovdqa (%rdi), %xmm0 +; AVXANY-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVXANY-NEXT: vptest %xmm1, %xmm0 +; AVXANY-NEXT: setb %al +; AVXANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp eq i128 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i256_load_arg(ptr %w) { +; SSE-LABEL: anybits_i256_load_arg: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: orq 24(%rdi), %rcx +; SSE-NEXT: orq 16(%rdi), %rax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVXANY-LABEL: anybits_i256_load_arg: +; AVXANY: # %bb.0: +; AVXANY-NEXT: vmovdqu (%rdi), %ymm0 +; AVXANY-NEXT: vptest %ymm0, %ymm0 +; AVXANY-NEXT: setne %al +; AVXANY-NEXT: vzeroupper +; AVXANY-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp ne i256 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i256_load_arg(ptr %w) { +; SSE-LABEL: allbits_i256_load_arg: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: andq 24(%rdi), %rcx +; SSE-NEXT: andq 16(%rdi), %rax +; SSE-NEXT: andq %rcx, %rax +; SSE-NEXT: cmpq $-1, %rax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1-LABEL: allbits_i256_load_arg: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vptest %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: allbits_i256_load_arg: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vptest %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: allbits_i256_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vptest %ymm1, %ymm0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp eq i256 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i512_load_arg(ptr %w) { +; NO512-LABEL: anybits_i512_load_arg: +; NO512: # %bb.0: +; NO512-NEXT: movq 16(%rdi), %rax +; NO512-NEXT: movq (%rdi), %rcx +; NO512-NEXT: movq 8(%rdi), %rdx +; NO512-NEXT: movq 24(%rdi), %rsi +; NO512-NEXT: orq 56(%rdi), %rsi +; NO512-NEXT: orq 40(%rdi), %rdx +; NO512-NEXT: orq %rsi, %rdx +; NO512-NEXT: orq 48(%rdi), %rax +; NO512-NEXT: orq 32(%rdi), %rcx +; NO512-NEXT: orq %rax, %rcx +; NO512-NEXT: orq %rdx, %rcx +; NO512-NEXT: setne %al +; NO512-NEXT: retq +; +; AVX512-LABEL: anybits_i512_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: setne %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp ne i512 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i512_load_arg(ptr %w) { +; NO512-LABEL: allbits_i512_load_arg: +; NO512: # %bb.0: +; NO512-NEXT: movq 16(%rdi), %rax +; NO512-NEXT: movq (%rdi), %rcx +; NO512-NEXT: movq 8(%rdi), %rdx +; NO512-NEXT: movq 24(%rdi), %rsi +; NO512-NEXT: andq 56(%rdi), %rsi +; NO512-NEXT: andq 40(%rdi), %rdx +; NO512-NEXT: andq %rsi, %rdx +; NO512-NEXT: andq 48(%rdi), %rax +; NO512-NEXT: andq 32(%rdi), %rcx +; NO512-NEXT: andq %rax, %rcx +; NO512-NEXT: andq %rdx, %rcx +; NO512-NEXT: cmpq $-1, %rcx +; NO512-NEXT: sete %al +; NO512-NEXT: retq +; +; AVX512-LABEL: allbits_i512_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1 +; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp eq i512 %ld, -1 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index e53eed4..504a392 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1760,7 +1760,7 @@ define void @mul_2xi16_varconst1(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movl c, %edx ; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,65535,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1781,7 +1781,7 @@ define void @mul_2xi16_varconst1(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movq c(%rip), %rax ; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,65535,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1864,7 +1864,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: movl c, %edx ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [65536,65536,65536,65536] ; X86-SSE-NEXT: psllq $32, %xmm0 ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl @@ -1876,7 +1876,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movl c, %edx ; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,65536,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1885,7 +1885,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: movq c(%rip), %rax ; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65536,65536,65536,65536] ; X64-SSE-NEXT: psllq $32, %xmm0 ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq @@ -1895,7 +1895,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movq c(%rip), %rax ; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,65536,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1922,7 +1922,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: movl c, %edx ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: psrad $16, %xmm0 -; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [32768,32768,32768,32768] ; X86-SSE-NEXT: psllq $32, %xmm0 ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl @@ -1934,7 +1934,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movl c, %edx ; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 -; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,32768,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1943,7 +1943,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: movq c(%rip), %rax ; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: psrad $16, %xmm0 -; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,32768,32768,32768] ; X64-SSE-NEXT: psllq $32, %xmm0 ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq @@ -1953,7 +1953,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movq c(%rip), %rax ; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,32768,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll index 975ffd0..e8c05f9 100644 --- a/llvm/test/CodeGen/X86/slow-pmulld.ll +++ b/llvm/test/CodeGen/X86/slow-pmulld.ll @@ -336,13 +336,13 @@ define <4 x i32> @test_mul_v4i32_v4i16(<4 x i16> %A) { ; SSE4-32-LABEL: test_mul_v4i32_v4i16: ; SSE4-32: # %bb.0: ; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE4-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; SSE4-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [18778,18778,18778,18778] ; SSE4-32-NEXT: retl ; ; SSE4-64-LABEL: test_mul_v4i32_v4i16: ; SSE4-64: # %bb.0: ; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [18778,18778,18778,18778] ; SSE4-64-NEXT: retq ; ; AVX2-SLOW-LABEL: test_mul_v4i32_v4i16: @@ -838,13 +838,13 @@ define <4 x i32> @test_mul_v4i32_v4i16_minsize(<4 x i16> %A) minsize { ; SSE-32-LABEL: test_mul_v4i32_v4i16_minsize: ; SSE-32: # %bb.0: ; SSE-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; SSE-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [18778,18778,18778,18778] ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: test_mul_v4i32_v4i16_minsize: ; SSE-64: # %bb.0: ; SSE-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [18778,18778,18778,18778] ; SSE-64-NEXT: retq ; ; AVX2-LABEL: test_mul_v4i32_v4i16_minsize: diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 42617c1..18588aa 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr @@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll index cc4bda8..650b562 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,SSE2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X64,SSE41 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,AVX1 diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll index 2d07788..bb7245c 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll @@ -10,15 +10,15 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_even: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -30,10 +30,10 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_even: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -47,10 +47,10 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_even: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -63,7 +63,7 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_even: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -75,7 +75,7 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -109,7 +109,7 @@ define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_allones_eq: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 @@ -119,7 +119,7 @@ define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_allones_eq: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -168,7 +168,7 @@ define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_allones_ne: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 @@ -178,7 +178,7 @@ define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_allones_ne: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -234,7 +234,7 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_allones_eq: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $1, %xmm1 @@ -248,7 +248,7 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_allones_eq: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 @@ -308,7 +308,7 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_allones_ne: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $1, %xmm1 @@ -322,7 +322,7 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_allones_ne: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 @@ -367,15 +367,15 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_eq: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -387,10 +387,10 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_eq: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -404,10 +404,10 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_eq: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -420,7 +420,7 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_eq: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -432,7 +432,7 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_eq: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -448,15 +448,15 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_ne: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -468,10 +468,10 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_ne: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -485,10 +485,10 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_ne: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -501,7 +501,7 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_ne: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -514,7 +514,7 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_ne: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -534,14 +534,14 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; CHECK-SSE2-NEXT: psrlq $32, %xmm0 @@ -553,7 +553,7 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 @@ -568,9 +568,9 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -581,7 +581,7 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -593,7 +593,7 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -611,9 +611,9 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_even_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,u,1,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -636,11 +636,11 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,268435456,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -654,11 +654,11 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,268435456,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -671,7 +671,7 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_even_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -683,7 +683,7 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -701,9 +701,9 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -711,7 +711,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [2147483648,u,1073741824,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] @@ -725,11 +725,11 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,268435456,1073741824] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -743,11 +743,11 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,u,1073741824,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2147483648,268435456,1073741824] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -760,7 +760,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -772,7 +772,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -806,7 +806,7 @@ define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 @@ -816,7 +816,7 @@ define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -871,7 +871,7 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $1, %xmm1 @@ -885,7 +885,7 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 @@ -929,15 +929,15 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_even_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -949,10 +949,10 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_even_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -966,10 +966,10 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_even_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -982,7 +982,7 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_even_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -994,7 +994,7 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -1018,9 +1018,9 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 @@ -1039,7 +1039,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] ; CHECK-SSE41-NEXT: pand %xmm0, %xmm2 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm2 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,1,858993458] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 @@ -1053,7 +1053,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 @@ -1067,7 +1067,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] ; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm2 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 @@ -1080,7 +1080,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 @@ -1102,7 +1102,7 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1137,8 +1137,8 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [2147483648,2147483648,2,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] @@ -1156,11 +1156,11 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX1-LABEL: test_srem_even_INT_MIN: ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [2147483648,2147483648,2,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] @@ -1177,7 +1177,7 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX2-LABEL: test_srem_even_INT_MIN: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1196,7 +1196,7 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 @@ -1219,7 +1219,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [3067833783,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1227,7 +1227,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] @@ -1253,8 +1253,8 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [2147483648,u,1073741824,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,2147483648,2,1073741824] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] @@ -1272,11 +1272,11 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN: ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [2147483648,u,1073741824,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,2147483648,2,1073741824] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] @@ -1293,7 +1293,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN: ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1312,7 +1312,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 @@ -1333,14 +1333,14 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3435973837,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; CHECK-SSE2-NEXT: psrlq $32, %xmm0 @@ -1352,7 +1352,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 @@ -1367,9 +1367,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1380,7 +1380,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3435973837] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -1392,7 +1392,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3435973837] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -1410,9 +1410,9 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,0,1,3067833783] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3067833783,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1420,7 +1420,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [1,u,2147483648,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] @@ -1434,11 +1434,11 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,0,1,3067833783] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,268435456,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1452,11 +1452,11 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,2147483648,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,1,268435456,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1469,7 +1469,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,3067833783] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -1481,7 +1481,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,3067833783] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -1499,9 +1499,9 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1509,7 +1509,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [1,u,1073741824,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] @@ -1523,11 +1523,11 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,1073741824,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,268435456,1073741824] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1541,11 +1541,11 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,1073741824,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,268435456,1073741824] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1558,7 +1558,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -1570,7 +1570,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -1604,7 +1604,7 @@ define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_allones_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,858993458] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 @@ -1614,7 +1614,7 @@ define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_allones_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1669,7 +1669,7 @@ define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_allones_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $1, %xmm1 @@ -1683,7 +1683,7 @@ define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_allones_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 @@ -1727,15 +1727,15 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1747,10 +1747,10 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,0,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,1,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1764,10 +1764,10 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,0,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,1,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1780,7 +1780,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,0,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -1792,7 +1792,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,0,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -1812,15 +1812,15 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3435973837,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,268435456,1,1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1832,10 +1832,10 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1,1] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1849,10 +1849,10 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1,1] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1865,7 +1865,7 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -1877,7 +1877,7 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -1895,9 +1895,9 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_even_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3067833783,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1905,7 +1905,7 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [268435456,u,2147483648,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] @@ -1919,11 +1919,11 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,u,2147483648,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,268435456,1,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1937,11 +1937,11 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,u,2147483648,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,268435456,1,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1954,7 +1954,7 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_even_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -1966,7 +1966,7 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -1984,15 +1984,15 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,268435456,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -2004,10 +2004,10 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -2021,10 +2021,10 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -2037,7 +2037,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2049,7 +2049,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -2067,9 +2067,9 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u] ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE2-NEXT: psrlq $32, %xmm1 ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -2081,9 +2081,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u] ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: psrlq $32, %xmm0 @@ -2096,9 +2096,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,u,1,u] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,u,268435456,u] ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 @@ -2110,7 +2110,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,0] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2122,7 +2122,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,0] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -2138,9 +2138,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,u,1,u] ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,u,268435456,u] ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE2-NEXT: psrlq $32, %xmm1 ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -2152,9 +2152,9 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,u,1,u] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,u,268435456,u] ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: psrlq $32, %xmm0 @@ -2167,9 +2167,9 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,u,1,u] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,u,268435456,u] ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 @@ -2181,7 +2181,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,0] ; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2193,7 +2193,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,0] ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 @@ -2335,10 +2335,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [34048,34048,26368,37632,21760,33024,22016,35072] +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,133,0,133,0,103,0,147,0,85,0,129,0,86,0,137] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [20224,26368,6912,30976,33024,33024,33024,12032] +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,79,0,103,0,27,0,121,0,129,0,129,0,129,0,47] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; CHECK-AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm4 ; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm5 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0] @@ -2369,10 +2369,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm4 ; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [2304,0,10496,37632,33024,33024,21760,36096] +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [0,9,0,0,0,41,0,147,0,129,0,129,0,85,0,141] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 ; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [22016,24320,37632,11008,12544,32512,16640,37632] +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [0,86,0,95,0,147,0,43,0,49,0,127,0,65,0,147] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7 ; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm7, %xmm6 ; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm7 # [0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0] @@ -2417,10 +2417,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31] -; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [34048,34048,26368,37632,21760,33024,22016,35072,2304,0,10496,37632,33024,33024,21760,36096] +; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,133,0,133,0,103,0,147,0,85,0,129,0,86,0,137,0,9,0,0,0,41,0,147,0,129,0,129,0,85,0,141] ; CHECK-AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 ; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23] -; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [20224,26368,6912,30976,33024,33024,33024,12032,22016,24320,37632,11008,12544,32512,16640,37632] +; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,79,0,103,0,27,0,121,0,129,0,129,0,129,0,47,0,86,0,95,0,147,0,43,0,49,0,127,0,65,0,147] ; CHECK-AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4 ; CHECK-AVX2-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; CHECK-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0] diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll index 3359202..d459d01 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll @@ -24,7 +24,7 @@ define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_25: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 @@ -34,7 +34,7 @@ define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_25: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -90,7 +90,7 @@ define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_100: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $2, %xmm1 @@ -104,7 +104,7 @@ define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_100: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0 @@ -165,7 +165,7 @@ define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_odd_neg25: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 @@ -175,7 +175,7 @@ define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_odd_neg25: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -231,7 +231,7 @@ define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_srem_even_neg100: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $2, %xmm1 @@ -245,7 +245,7 @@ define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_srem_even_neg100: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0 @@ -333,7 +333,7 @@ define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind { ; CHECK-SSE41-NEXT: psrld $31, %xmm1 ; CHECK-SSE41-NEXT: psrad $3, %xmm2 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [25,25,25,25] ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -351,7 +351,7 @@ define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind { ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25,25,25,25] ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -444,7 +444,7 @@ define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind { ; CHECK-SSE41-NEXT: psrld $31, %xmm1 ; CHECK-SSE41-NEXT: psrad $5, %xmm2 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [100,100,100,100] ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -462,7 +462,7 @@ define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind { ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [100,100,100,100] ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/udiv-exact.ll b/llvm/test/CodeGen/X86/udiv-exact.ll index 271d11e..2b3f26a 100644 --- a/llvm/test/CodeGen/X86/udiv-exact.ll +++ b/llvm/test/CodeGen/X86/udiv-exact.ll @@ -87,7 +87,7 @@ define <4 x i32> @test5(<4 x i32> %x) { ; X86-NEXT: pmuludq %xmm1, %xmm0 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2863311531,u,3264175145,u] ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: retl @@ -95,7 +95,7 @@ define <4 x i32> @test5(<4 x i32> %x) { ; X64-LABEL: test5: ; X64: # %bb.0: ; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3264175145,3264175145] ; X64-NEXT: retq %div = udiv exact <4 x i32> %x, <i32 24, i32 24, i32 25, i32 25> ret <4 x i32> %div @@ -112,7 +112,7 @@ define <4 x i32> @test6(<4 x i32> %x) { ; X86-NEXT: pmuludq %xmm0, %xmm1 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2863311531,u,3303820997,u] ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X86-NEXT: movdqa %xmm1, %xmm0 @@ -121,7 +121,7 @@ define <4 x i32> @test6(<4 x i32> %x) { ; X64-LABEL: test6: ; X64: # %bb.0: ; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3303820997,3303820997] ; X64-NEXT: retq %div = udiv exact <4 x i32> %x, <i32 24, i32 24, i32 26, i32 26> ret <4 x i32> %div @@ -131,16 +131,16 @@ define <4 x i32> @test7(<4 x i32> %x) { ; X86-LABEL: test7: ; X86: # %bb.0: ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [3264175145,3264175145,1749801491,1749801491] ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [3264175145,u,1749801491,u] ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: retl ; ; X64-LABEL: test7: ; X64: # %bb.0: -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,1749801491,1749801491] ; X64-NEXT: retq %div = udiv exact <4 x i32> %x, <i32 25, i32 25, i32 27, i32 27> ret <4 x i32> %div @@ -156,7 +156,7 @@ define <4 x i32> @test8(<4 x i32> %x) { ; X86-NEXT: pmuludq %xmm1, %xmm0 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3] -; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,u,2863311531,u] ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: retl @@ -164,7 +164,7 @@ define <4 x i32> @test8(<4 x i32> %x) { ; X64-LABEL: test8: ; X64: # %bb.0: ; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,2863311531,2863311531] ; X64-NEXT: retq %div = udiv exact <4 x i32> %x, <i32 1, i32 1, i32 24, i32 24> ret <4 x i32> %div diff --git a/llvm/test/CodeGen/X86/undo-mul-and.ll b/llvm/test/CodeGen/X86/undo-mul-and.ll index c9c40099..6566153 100644 --- a/llvm/test/CodeGen/X86/undo-mul-and.ll +++ b/llvm/test/CodeGen/X86/undo-mul-and.ll @@ -63,9 +63,9 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_fail_no_splat(<4 x i32> %x) { ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [56,56,56,64] ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [56,u,64,u] ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -73,13 +73,13 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_fail_no_splat(<4 x i32> %x) { ; ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,64] ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: retq ; ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat: ; CHECK-AVX512: # %bb.0: -; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,64] ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 64> @@ -92,9 +92,9 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat1(<4 x i32> %x) { ; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1: ; CHECK-SSE: # %bb.0: ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [56,56,56,48] ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [56,u,48,u] ; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -102,13 +102,13 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat1(<4 x i32> %x) { ; ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,48] ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: retq ; ; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1: ; CHECK-AVX512: # %bb.0: -; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,48] ; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: retq %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 48> @@ -131,7 +131,7 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat2(<4 x i32> %x) { ; ; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,56] ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll index 7c1a1e2..759055d 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,SSE2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X64,SSE41 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,AVX1 @@ -168,7 +167,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; SSE41-NEXT: pinsrd $1, %esi, %xmm0 ; SSE41-NEXT: pinsrd $2, %edx, %xmm0 ; SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [683,1463,819,u] ; SSE41-NEXT: pmovsxwd {{.*#+}} xmm1 = [2047,2047,2047,2047] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pand %xmm1, %xmm2 @@ -194,7 +193,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ; AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [683,1463,819,u] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [2047,2047,2047,2047] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 ; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2 @@ -219,7 +218,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; AVX2-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; AVX2-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ; AVX2-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [683,1463,819,u] ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2047,2047,2047,2047] ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 @@ -241,7 +240,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; AVX512VL-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [683,1463,819,u] ; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2047,2047,2047,2047] ; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll index 838086e..2228c09 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll @@ -10,10 +10,10 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -26,9 +26,9 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -42,9 +42,9 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -57,7 +57,7 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -68,7 +68,7 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -87,9 +87,9 @@ define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_allones_eq: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -99,7 +99,7 @@ define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_allones_eq: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -108,7 +108,7 @@ define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX-LABEL: test_urem_odd_allones_eq: ; CHECK-AVX: # %bb.0: -; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,4294967295,3435973837] ; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 @@ -122,9 +122,9 @@ define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_allones_ne: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -134,7 +134,7 @@ define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_allones_ne: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993460,858993460,2,858993460] ; CHECK-SSE41-NEXT: pmaxud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -143,7 +143,7 @@ define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX-LABEL: test_urem_odd_allones_ne: ; CHECK-AVX: # %bb.0: -; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,4294967295,3435973837] ; CHECK-AVX-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 @@ -159,12 +159,12 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_allones_eq: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] @@ -178,10 +178,10 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_allones_eq: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -195,10 +195,10 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_allones_eq: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,1,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -211,7 +211,7 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_allones_eq: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -222,7 +222,7 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_allones_eq: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -237,12 +237,12 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_allones_ne: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] @@ -256,10 +256,10 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_allones_ne: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -273,10 +273,10 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_allones_ne: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,1,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -289,7 +289,7 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_allones_ne: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -300,7 +300,7 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_allones_ne: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -317,10 +317,10 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even_allones_eq: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -333,9 +333,9 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even_allones_eq: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -349,9 +349,9 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even_allones_eq: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -364,7 +364,7 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even_allones_eq: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -375,7 +375,7 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_eq: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -390,10 +390,10 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even_allones_ne: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -406,9 +406,9 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even_allones_ne: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -422,9 +422,9 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even_allones_ne: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -437,7 +437,7 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even_allones_ne: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -448,7 +448,7 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_ne: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -467,10 +467,10 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; CHECK-SSE2-NEXT: psrlq $32, %xmm0 @@ -482,7 +482,7 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] @@ -496,8 +496,8 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -508,7 +508,7 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -519,7 +519,7 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -536,12 +536,12 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,268435456,2147483648] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] @@ -555,10 +555,10 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,268435456,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -572,10 +572,10 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,268435456,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -588,7 +588,7 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -599,7 +599,7 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -616,11 +616,11 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,268435456,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -634,10 +634,10 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,268435456,1073741824] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -651,10 +651,10 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,u,1073741824,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2147483648,268435456,1073741824] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -667,7 +667,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -678,7 +678,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -710,7 +710,7 @@ define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,4294967295,858993459] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -719,7 +719,7 @@ define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 @@ -769,7 +769,7 @@ define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $1, %xmm1 ; CHECK-SSE41-NEXT: pslld $31, %xmm0 @@ -782,7 +782,7 @@ define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -822,10 +822,10 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -838,9 +838,9 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -854,9 +854,9 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -869,7 +869,7 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -880,7 +880,7 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -899,10 +899,10 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_INT_MIN: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,2,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; CHECK-SSE2-NEXT: psrlq $32, %xmm0 @@ -914,7 +914,7 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_INT_MIN: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,2] ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] @@ -928,8 +928,8 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_INT_MIN: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,2,u] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -940,7 +940,7 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_INT_MIN: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -951,7 +951,7 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_INT_MIN: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -968,12 +968,12 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_INT_MIN: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783] ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,2,2147483648] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] @@ -987,10 +987,10 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_INT_MIN: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,2,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1004,10 +1004,10 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_INT_MIN: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,2,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1020,7 +1020,7 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_INT_MIN: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1031,7 +1031,7 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_INT_MIN: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1048,11 +1048,11 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even_INT_MIN: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,2,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1066,10 +1066,10 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even_INT_MIN: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,2,1073741824] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1083,10 +1083,10 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even_INT_MIN: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,u,1073741824,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2147483648,2,1073741824] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1099,7 +1099,7 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even_INT_MIN: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1110,7 +1110,7 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_INT_MIN: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1129,10 +1129,10 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3435973837,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3435973837] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; CHECK-SSE2-NEXT: psrlq $32, %xmm0 @@ -1144,7 +1144,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3435973837] ; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] @@ -1158,8 +1158,8 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3435973837] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1170,7 +1170,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3435973837] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1181,7 +1181,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3435973837] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1198,11 +1198,11 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,3067833783] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,268435456,2147483648] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3067833783,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1216,10 +1216,10 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,3067833783] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,268435456,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1233,10 +1233,10 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,3067833783] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,2147483648,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,1,268435456,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1249,7 +1249,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,3067833783] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1260,7 +1260,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,3067833783] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1277,11 +1277,11 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_poweroftwo: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3264175145] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,268435456,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,1073741824,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1295,10 +1295,10 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_poweroftwo: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,1073741824,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,268435456,1073741824] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1312,10 +1312,10 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_poweroftwo: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,1073741824,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,268435456,1073741824] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1328,7 +1328,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_poweroftwo: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1339,7 +1339,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_poweroftwo: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1358,9 +1358,9 @@ define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_allones_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3435973837,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1370,7 +1370,7 @@ define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_allones_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3435973837] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,858993459] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -1379,7 +1379,7 @@ define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX-LABEL: test_urem_odd_allones_and_one: ; CHECK-AVX: # %bb.0: -; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3435973837] ; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 @@ -1395,11 +1395,11 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_allones_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,0,3067833783] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,1,2147483648] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3067833783,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1413,10 +1413,10 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_allones_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,0,3067833783] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,1,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1430,10 +1430,10 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_allones_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,0,3067833783] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,2147483648,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,1,1,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1446,7 +1446,7 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_allones_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,0,3067833783] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1457,7 +1457,7 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,0,3067833783] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1474,10 +1474,10 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,1,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1490,9 +1490,9 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,1,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1506,9 +1506,9 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,1,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1521,7 +1521,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1532,7 +1532,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1551,10 +1551,10 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3435973837,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1,1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1567,9 +1567,9 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1,1] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1583,9 +1583,9 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1,1] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1598,7 +1598,7 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1609,7 +1609,7 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1626,11 +1626,11 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,268435456,1,2147483648] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3067833783,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,u,2147483648,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1644,10 +1644,10 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,u,2147483648,u] +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,268435456,1,2147483648] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1661,10 +1661,10 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,u,2147483648,u] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,268435456,1,2147483648] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1677,7 +1677,7 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1688,7 +1688,7 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1705,10 +1705,10 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3264175145,u] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1073741824,1073741824] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1721,9 +1721,9 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1073741824,1073741824] ; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1737,9 +1737,9 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1073741824,1073741824] ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1752,7 +1752,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1763,7 +1763,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1781,10 +1781,10 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,0,0,0] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,0] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; CHECK-SSE2-NEXT: psrlq $32, %xmm0 @@ -1796,7 +1796,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,0] ; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] @@ -1810,8 +1810,8 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,0] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1822,7 +1822,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,0] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1833,7 +1833,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou ; ; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,0] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -1849,10 +1849,10 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo_and_one: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,0,0,0] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,0] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,u,268435456,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; CHECK-SSE2-NEXT: psrlq $32, %xmm0 @@ -1864,7 +1864,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,0] ; CHECK-SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = [2147483648,268435456] ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] @@ -1878,8 +1878,8 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo_and_one: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,0] +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [2147483648,u,268435456,u] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1890,7 +1890,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo_and_one: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,0] ; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1901,7 +1901,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no ; ; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,0] ; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll index 6a36cd2..8042103 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll @@ -25,7 +25,7 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind { ; CHECK-SSE41-LABEL: t32_3: ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,1431655764,1431655764,1431655764] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -34,7 +34,7 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind { ; CHECK-AVX1-LABEL: t32_3: ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: retq @@ -80,7 +80,7 @@ define <4 x i1> @t32_5(<4 x i32> %X) nounwind { ; CHECK-SSE41-LABEL: t32_5: ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,858993458,858993458] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -89,7 +89,7 @@ define <4 x i1> @t32_5(<4 x i32> %X) nounwind { ; CHECK-AVX1-LABEL: t32_5: ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837] ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: retq @@ -140,7 +140,7 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind { ; CHECK-SSE41-LABEL: t32_6_part0: ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $1, %xmm1 ; CHECK-SSE41-NEXT: pslld $31, %xmm0 @@ -153,7 +153,7 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind { ; CHECK-AVX1-LABEL: t32_6_part0: ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -211,7 +211,7 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind { ; CHECK-SSE41-LABEL: t32_6_part1: ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $1, %xmm1 ; CHECK-SSE41-NEXT: pslld $31, %xmm0 @@ -224,7 +224,7 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind { ; CHECK-AVX1-LABEL: t32_6_part1: ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -275,7 +275,7 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind { ; CHECK-SSE41-LABEL: t32_tautological: ; CHECK-SSE41: # %bb.0: ; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295,4294967295,1431655764] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -286,7 +286,7 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind { ; CHECK-AVX1-LABEL: t32_tautological: ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll index 2166e43..b490c3c 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll @@ -23,7 +23,7 @@ define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_25: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -32,7 +32,7 @@ define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_odd_25: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 @@ -83,7 +83,7 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_100: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $2, %xmm1 ; CHECK-SSE41-NEXT: pslld $30, %xmm0 @@ -96,7 +96,7 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_100: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -139,9 +139,9 @@ define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_neg25: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,1030792151,1030792151,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1030792151,u,3264175145,u] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -151,7 +151,7 @@ define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_odd_neg25: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,1030792151,1030792151,3264175145] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,1,1,171798691] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -160,7 +160,7 @@ define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind { ; ; CHECK-AVX-LABEL: test_urem_odd_neg25: ; CHECK-AVX: # %bb.0: -; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,1030792151,1030792151,3264175145] ; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 @@ -176,9 +176,9 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_even_neg100: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4252017623,3264175145,4252017623,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3264175145,3264175145,3264175145,3264175145] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 @@ -192,7 +192,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: test_urem_even_neg100: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4252017623,3264175145,4252017623,3264175145] ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE41-NEXT: psrld $2, %xmm1 ; CHECK-SSE41-NEXT: pslld $30, %xmm0 @@ -205,7 +205,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: test_urem_even_neg100: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4252017623,3264175145,4252017623,3264175145] ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -216,7 +216,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { ; ; CHECK-AVX2-LABEL: test_urem_even_neg100: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4252017623,3264175145,4252017623,3264175145] ; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -227,7 +227,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: test_urem_even_neg100: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4252017623,3264175145,4252017623,3264175145] ; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -277,7 +277,7 @@ define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind { ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: psrld $3, %xmm2 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [25,25,25,25] ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -293,7 +293,7 @@ define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind { ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25,25,25,25] ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 @@ -372,7 +372,7 @@ define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind { ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-SSE41-NEXT: psrld $5, %xmm2 -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [100,100,100,100] ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -388,7 +388,7 @@ define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind { ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [100,100,100,100] ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll index 84856aa..e5b19a5 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -25,7 +25,7 @@ define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind { define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: t1_all_odd_eq: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -33,7 +33,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: t1_all_odd_eq: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,4294967295,4294967295,4294967295] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -43,7 +43,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: t1_all_odd_eq: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -76,7 +76,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: t1_all_odd_ne: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -84,7 +84,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { ; ; CHECK-SSE41-LABEL: t1_all_odd_ne: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,4294967295,4294967295,4294967295] ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 @@ -95,7 +95,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { ; ; CHECK-AVX1-LABEL: t1_all_odd_ne: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531] ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -187,7 +187,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 ; CHECK-SSE2-NEXT: psrlq $32, %xmm3 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311530,2863311530] ; CHECK-SSE2-NEXT: paddq %xmm3, %xmm0 ; CHECK-SSE2-NEXT: psllq $32, %xmm0 ; CHECK-SSE2-NEXT: paddq %xmm2, %xmm0 @@ -212,7 +212,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3 ; CHECK-SSE41-NEXT: psrlq $32, %xmm3 ; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 -; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311530,2863311530] ; CHECK-SSE41-NEXT: paddq %xmm3, %xmm0 ; CHECK-SSE41-NEXT: psllq $32, %xmm0 ; CHECK-SSE41-NEXT: paddq %xmm2, %xmm0 @@ -236,7 +236,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3 ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311530,2863311530] ; CHECK-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -255,7 +255,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311530,2863311530] ; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index 6174011..fce8795 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -5,9 +5,9 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,XOP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVXNOVLBW,AVX512,AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX512VLBW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512VL,VLVBMI @@ -241,7 +241,7 @@ define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind { ; ; SSE41-LABEL: var_shuffle_v4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036] ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: pshufb %xmm1, %xmm0 ; SSE41-NEXT: retq @@ -319,7 +319,7 @@ define <4 x i32> @var_shuffle_zero_v4i32(<4 x i32> %v, <4 x i32> %indices) nounw ; SSE41-NEXT: pmaxud %xmm1, %xmm2 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm2 ; SSE41-NEXT: por %xmm2, %xmm1 -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036] ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: pshufb %xmm1, %xmm0 @@ -598,6 +598,33 @@ define <8 x i16> @var_shuffle_zero_v8i16(<8 x i16> %v, <8 x i16> %indices) nounw ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; +; AVX512F-LABEL: var_shuffle_zero_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX512F-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2 +; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [514,514,514,514,514,514,514,514] +; AVX512F-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: var_shuffle_zero_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7] +; AVX512BW-NEXT: vpcmpnleuw %zmm2, %zmm1, %k1 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [514,514,514,514,514,514,514,514] +; AVX512BW-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; ; AVX512VL-LABEL: var_shuffle_zero_v8i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 @@ -921,6 +948,28 @@ define <16 x i8> @var_shuffle_zero_v16i8(<16 x i8> %v, <16 x i8> %indices) nounw ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; +; AVX512F-LABEL: var_shuffle_zero_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2 +; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: var_shuffle_zero_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; ; AVX512VL-LABEL: var_shuffle_zero_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 @@ -1212,7 +1261,7 @@ define <4 x float> @var_shuffle_v4f32(<4 x float> %v, <4 x i32> %indices) nounwi ; ; SSE41-LABEL: var_shuffle_v4f32: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036] ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: pshufb %xmm1, %xmm0 ; SSE41-NEXT: retq @@ -1290,7 +1339,7 @@ define <4 x float> @var_shuffle_zero_v4f32(<4 x float> %v, <4 x i32> %indices) n ; SSE41-NEXT: pmaxud %xmm1, %xmm2 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm2 ; SSE41-NEXT: por %xmm2, %xmm1 -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036] ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: por %xmm2, %xmm1 ; SSE41-NEXT: pshufb %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec_reassociate.ll b/llvm/test/CodeGen/X86/vec_reassociate.ll index a9473fff..4703ca3 100644 --- a/llvm/test/CodeGen/X86/vec_reassociate.ll +++ b/llvm/test/CodeGen/X86/vec_reassociate.ll @@ -38,13 +38,13 @@ define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) { ; X86-LABEL: mul_4i32: ; X86: # %bb.0: ; X86-NEXT: pmulld %xmm1, %xmm0 -; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,6,6,4] ; X86-NEXT: retl ; ; X64-LABEL: mul_4i32: ; X64: # %bb.0: ; X64-NEXT: pmulld %xmm1, %xmm0 -; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,6,6,4] ; X64-NEXT: retq %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4> %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1> @@ -56,13 +56,13 @@ define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) { ; X86-LABEL: mul_4i32_commute: ; X86: # %bb.0: ; X86-NEXT: pmulld %xmm1, %xmm0 -; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,6,6,4] ; X86-NEXT: retl ; ; X64-LABEL: mul_4i32_commute: ; X64: # %bb.0: ; X64-NEXT: pmulld %xmm1, %xmm0 -; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,6,6,4] ; X64-NEXT: retq %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0 %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 762900e..a0c2760 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -1821,9 +1821,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32,u,128,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: por %xmm1, %xmm0 @@ -1841,7 +1841,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; SSE41-NEXT: psrld $28, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE41-NEXT: por %xmm2, %xmm0 ; SSE41-NEXT: retq ; @@ -1854,7 +1854,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; AVX1-NEXT: vpsrld $28, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -1935,9 +1935,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [32,u,128,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X86-SSE2-NEXT: por %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 445e572..2fadf5f 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -1647,7 +1647,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [256,512,1024,2048] ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsrld $25, %xmm1, %xmm3 ; AVX1-NEXT: vpsrld $27, %xmm1, %xmm4 @@ -1656,7 +1656,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; AVX1-NEXT: vpsrld $28, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm4[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index d0690bd..ec2efcd 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1302,9 +1302,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; SSE2-LABEL: constant_funnnel_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1316,8 +1316,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; SSE41-LABEL: constant_funnnel_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u] +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1328,8 +1328,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; AVX1-LABEL: constant_funnnel_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1394,9 +1394,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; X86-SSE2-LABEL: constant_funnnel_v4i32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,128,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index 421fa98..5f7e407 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -1082,13 +1082,13 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind { ; AVX1-LABEL: constant_funnnel_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [512,u,2048,u] ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [256,512,1024,2048] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm2 = ymm0[1,1,3,3,5,5,7,7] ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7] diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll index b378dce..304daab 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll @@ -319,9 +319,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; SSE2-LABEL: constant_funnnel_v2i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,1,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -333,8 +333,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; SSE41-LABEL: constant_funnnel_v2i32: ; SSE41: # %bb.0: ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,1,u] +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -345,8 +345,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; AVX1-LABEL: constant_funnnel_v2i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,1,u] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,1,1] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -411,9 +411,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; X86-SSE2-LABEL: constant_funnnel_v2i32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,1,1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,1,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll index 06ff7e7..ae5dd18 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll @@ -500,9 +500,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; SSE2-NEXT: psrld $27, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32,u,1,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: por %xmm1, %xmm0 @@ -514,7 +514,7 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; SSE41-NEXT: psrld $27, %xmm2 ; SSE41-NEXT: psrld $28, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1] ; SSE41-NEXT: por %xmm2, %xmm0 ; SSE41-NEXT: retq ; @@ -523,7 +523,7 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; AVX1-NEXT: vpsrld $27, %xmm1, %xmm2 ; AVX1-NEXT: vpsrld $28, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,1,1] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -598,9 +598,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { ; X86-SSE2-NEXT: psrld $27, %xmm2 ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,1,1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [32,u,1,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X86-SSE2-NEXT: por %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 9b52857..33a6a76 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1741,9 +1741,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [134217728,u,33554432,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: por %xmm1, %xmm0 @@ -1761,7 +1761,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; SSE41-NEXT: psrld $4, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432] ; SSE41-NEXT: por %xmm2, %xmm0 ; SSE41-NEXT: retq ; @@ -1774,7 +1774,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; AVX1-NEXT: vpsrld $4, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -1856,9 +1856,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [268435456,134217728,67108864,33554432] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [134217728,u,33554432,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X86-SSE2-NEXT: por %xmm1, %xmm0 @@ -1872,7 +1872,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm1, %xmm2 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; SSE2-NEXT: por %xmm1, %xmm2 ; SSE2-NEXT: paddw %xmm0, %xmm0 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] @@ -1964,7 +1964,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 -; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; X86-SSE2-NEXT: por %xmm1, %xmm2 ; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index a387562..217431be 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -1403,7 +1403,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [16777216,8388608,4194304,2097152] ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsrld $7, %xmm1, %xmm3 ; AVX1-NEXT: vpsrld $5, %xmm1, %xmm4 @@ -1412,7 +1412,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; AVX1-NEXT: vpsrld $4, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm4[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 4969cb5..5d01dfd 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1380,9 +1380,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; SSE2-LABEL: constant_funnnel_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,33554432,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1394,8 +1394,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; SSE41-LABEL: constant_funnnel_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,33554432,u] +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1406,8 +1406,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; AVX1-LABEL: constant_funnnel_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [134217728,u,33554432,u] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1472,9 +1472,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind { ; X86-SSE2-LABEL: constant_funnnel_v4i32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [268435456,134217728,67108864,33554432] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [134217728,u,33554432,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index e2a3e26..4dc931d 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -1134,13 +1134,13 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind { ; AVX1-LABEL: constant_funnnel_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [134217728,u,33554432,u] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [8388608,u,2097152,u] ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [16777216,8388608,4194304,2097152] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm2 = ymm0[1,1,3,3,5,5,7,7] ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7] diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll index ef5ffe4..4b42b18 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll @@ -341,9 +341,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; SSE2-LABEL: constant_funnnel_v2i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,1,1] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,1,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -355,8 +355,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; SSE41-LABEL: constant_funnnel_v2i32: ; SSE41: # %bb.0: ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,1,u] +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,1,1] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -367,8 +367,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; AVX1-LABEL: constant_funnnel_v2i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [134217728,u,1,u] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,1,1] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -433,9 +433,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { ; X86-SSE2-LABEL: constant_funnnel_v2i32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [268435456,134217728,1,1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [134217728,u,1,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll index 816d5ca..e68d1d7 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -171,7 +171,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE-NEXT: movdqa {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632] +; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; SSE-NEXT: pmulhw %xmm3, %xmm2 ; SSE-NEXT: psrlw $8, %xmm2 ; SSE-NEXT: pxor %xmm4, %xmm4 @@ -193,7 +193,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX1-NEXT: vpmulhw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] @@ -260,11 +260,11 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632] +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147] ; SSE-NEXT: psrlw $8, %xmm2 ; SSE-NEXT: pxor %xmm3, %xmm3 ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632] +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147] ; SSE-NEXT: psrlw $8, %xmm3 ; SSE-NEXT: packuswb %xmm2, %xmm3 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -287,10 +287,10 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -561,7 +561,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE-NEXT: movdqa {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632] +; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; SSE-NEXT: pmulhw %xmm3, %xmm2 ; SSE-NEXT: psrlw $8, %xmm2 ; SSE-NEXT: pxor %xmm4, %xmm4 @@ -588,7 +588,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX1-NEXT: vpmulhw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] @@ -667,11 +667,11 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [37632,20224,11008,47872,26368,14592,14592,37632] +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147] ; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632] +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: packuswb %xmm1, %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,0,255,0,0,255,255,0,0,255,0,0,0,255] @@ -706,11 +706,11 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632] +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: pxor %xmm3, %xmm3 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632] +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: packuswb %xmm2, %xmm3 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,0,0,255,0,0,255,255,0,0,255,0,0,0,255] @@ -741,10 +741,10 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 @@ -1116,11 +1116,11 @@ define <16 x i8> @PR143238(<16 x i8> %a0) { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [26368,47872,11008,20224,37632,35072,33024,30976] +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,103,0,187,0,43,0,79,0,147,0,137,0,129,0,121] ; SSE-NEXT: psrlw $8, %xmm2 ; SSE-NEXT: pxor %xmm3, %xmm3 ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [33024,22016,33024,26368,11008,37632,33024,14592] +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,129,0,86,0,129,0,103,0,43,0,147,0,129,0,57] ; SSE-NEXT: psrlw $8, %xmm3 ; SSE-NEXT: packuswb %xmm2, %xmm3 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1144,10 +1144,10 @@ define <16 x i8> @PR143238(<16 x i8> %a0) { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [26368,47872,11008,20224,37632,35072,33024,30976] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,103,0,187,0,43,0,79,0,147,0,137,0,129,0,121] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [33024,22016,33024,26368,11008,37632,33024,14592] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,129,0,86,0,129,0,103,0,43,0,147,0,129,0,57] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll index 63c69e5..7355f36 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -161,7 +161,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37632,37632,37632,37632,37632,37632,37632,37632] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] @@ -198,7 +198,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] +; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] @@ -245,10 +245,10 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,20224,11008,47872,26368,14592,33024,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [47872,12544,26368,6912,14592,30976,33024,35072] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 @@ -266,10 +266,10 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm1, %xmm4 ; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [35072,33024,30976,14592,6912,26368,12544,47872] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [37632,33024,14592,26368,47872,11008,20224,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 @@ -291,10 +291,10 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632] +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072] +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 @@ -539,7 +539,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37632,37632,37632,37632,37632,37632,37632,37632] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] @@ -585,7 +585,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] +; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] @@ -640,10 +640,10 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,20224,11008,47872,26368,14592,33024,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [47872,12544,26368,6912,14592,30976,33024,35072] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4 @@ -668,10 +668,10 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3 ; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [35072,33024,30976,14592,6912,26368,12544,47872] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [37632,33024,14592,26368,47872,11008,20224,37632] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm3, %xmm5, %xmm3 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3 @@ -699,10 +699,10 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632] +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072] +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll index 6bc4fcb..5445330 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll @@ -132,7 +132,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] -; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] +; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] @@ -169,7 +169,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63] -; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] +; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX512BW-NEXT: vpmulhw %zmm3, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55] @@ -199,10 +199,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm3 @@ -220,10 +220,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm4 ; AVX512F-NEXT: vpsubb %ymm4, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0 @@ -245,10 +245,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912,35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27,0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072,6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137,0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 @@ -444,7 +444,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] +; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] @@ -490,7 +490,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63] -; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] +; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147] ; AVX512BW-NEXT: vpmulhw %zmm3, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55] @@ -524,10 +524,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm4 @@ -552,10 +552,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpor %ymm3, %ymm5, %ymm3 ; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3 ; AVX512F-NEXT: vpaddb %ymm4, %ymm3, %ymm3 @@ -583,10 +583,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912,35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27,0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072,6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137,0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index 33d80f6..6cd5098 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -169,7 +169,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,37,37,37,37,37,37,37] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm4 @@ -209,7 +209,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero @@ -270,22 +270,22 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,256,256,256,256,256,256,256] ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,79,171,117,205,57,57,37] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,256,256,256,256,256,256,128] ; SSE2-NEXT: psrlw $8, %xmm3 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,32,57,205,117,171,79,147] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: packuswb %xmm2, %xmm3 ; SSE2-NEXT: psubb %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,128,0,0,0,128] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,0,0,0,128,0,0,0] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; SSE2-NEXT: psrlw $8, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: paddb %xmm3, %xmm0 @@ -309,7 +309,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm3 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; SSE41-NEXT: psrlw $8, %xmm3 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,79,171,117,205,57,57,37] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -317,15 +317,15 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm4 ; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7] ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,32,57,205,117,171,79,147] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: packuswb %xmm3, %xmm4 ; SSE41-NEXT: psubb %xmm4, %xmm0 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,0,0,128] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,128,0,0,0] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm0, %xmm2 ; SSE41-NEXT: paddb %xmm4, %xmm2 @@ -346,22 +346,22 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,79,171,117,205,57,57,37] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,32,57,205,117,171,79,147] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,0,0,128,0,0,0,128] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,128,0,0,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 @@ -638,7 +638,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,37,37,37,37,37,37,37] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm4 @@ -690,7 +690,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero @@ -763,23 +763,23 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,256,256,256,256,256,256,256] ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,79,171,117,205,57,57,37] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,256,256,256,256,256,256,128] ; SSE2-NEXT: psrlw $8, %xmm3 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,32,57,205,117,171,79,147] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: packuswb %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psubb %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [0,0,0,128,0,0,0,128] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; SSE2-NEXT: psrlw $8, %xmm4 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,128,0,0,0] +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: packuswb %xmm4, %xmm2 ; SSE2-NEXT: paddb %xmm3, %xmm2 @@ -809,7 +809,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm3 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; SSE41-NEXT: psrlw $8, %xmm3 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,79,171,117,205,57,57,37] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -817,16 +817,16 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm4 ; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7] ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,32,57,205,117,171,79,147] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: packuswb %xmm3, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: psubb %xmm4, %xmm2 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,128,0,0,0,128] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [128,0,0,0,128,0,0,0] +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: packuswb %xmm2, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 @@ -854,22 +854,22 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,79,171,117,205,57,57,37] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,32,57,205,117,171,79,147] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,128,0,0,0,128] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [128,0,0,0,128,0,0,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll index e43108f..98ea87c 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -166,7 +166,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37,37,37,37,37,37,37,37] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero @@ -200,7 +200,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] @@ -246,22 +246,22 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,79,171,117,205,57,32,37] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,135,205,27,57,241,16,137] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,128,0,0,0,128] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,128,0,0,0,0,0,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2 @@ -276,22 +276,22 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,16,241,57,27,205,135,187] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [37,32,57,205,117,171,79,147] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,128,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,128,0,0,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 @@ -312,20 +312,20 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,16,241,57,27,205,135,187,147,79,171,117,205,57,32,37] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm4, %ymm3 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,32,57,205,117,171,79,147,187,135,205,27,57,241,16,137] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,0,0,0,0,0,128,0,0,0,0,128,0,0,0,128] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,0,128,0,0,0,0,0,0] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 @@ -578,7 +578,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] -; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37,37,37,37,37,37,37,37] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero @@ -622,7 +622,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] @@ -676,22 +676,22 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,79,171,117,205,57,32,37] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,135,205,27,57,241,16,137] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,0,0,128,0,0,0,128] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,128,0,0,0,0,0,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3 @@ -713,22 +713,22 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,16,241,57,27,205,135,187] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm6, %xmm6 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,5,6],xmm6[7] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [37,32,57,205,117,171,79,147] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm5 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm5[8],xmm1[8],xmm5[9],xmm1[9],xmm5[10],xmm1[10],xmm5[11],xmm1[11],xmm5[12],xmm1[12],xmm5[13],xmm1[13],xmm5[14],xmm1[14],xmm5[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [0,0,0,0,0,0,128,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [128,0,0,0,128,0,0,0] +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vpaddb %xmm4, %xmm5, %xmm4 @@ -755,20 +755,20 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,16,241,57,27,205,135,187,147,79,171,117,205,57,32,37] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm4, %ymm3 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,32,57,205,117,171,79,147,187,135,205,27,57,241,16,137] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm3 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm1[8],ymm3[9],ymm1[9],ymm3[10],ymm1[10],ymm3[11],ymm1[11],ymm3[12],ymm1[12],ymm3[13],ymm1[13],ymm3[14],ymm1[14],ymm3[15],ymm1[15],ymm3[24],ymm1[24],ymm3[25],ymm1[25],ymm3[26],ymm1[26],ymm3[27],ymm1[27],ymm3[28],ymm1[28],ymm3[29],ymm1[29],ymm3[30],ymm1[30],ymm3[31],ymm1[31] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,128,0,0,0,0,128,0,0,0,128] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm1[0],ymm3[1],ymm1[1],ymm3[2],ymm1[2],ymm3[3],ymm1[3],ymm3[4],ymm1[4],ymm3[5],ymm1[5],ymm3[6],ymm1[6],ymm3[7],ymm1[7],ymm3[16],ymm1[16],ymm3[17],ymm1[17],ymm3[18],ymm1[18],ymm3[19],ymm1[19],ymm3[20],ymm1[20],ymm3[21],ymm1[21],ymm3[22],ymm1[22],ymm3[23],ymm1[23] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,0,0,0,128,0,0,0,0,128,0,0,0,0,0,0] +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm4, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm3, %ymm2 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll index bf98bcc..a11fa370 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -135,7 +135,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] @@ -169,7 +169,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] -; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] @@ -199,20 +199,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,64,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2 @@ -226,20 +226,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,256,256,256,256,256,128,256,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,128,256,256,256,256,256,64,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vpsubb %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0 @@ -259,20 +259,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27,137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0,137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137,27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0,27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0,0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0,0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; AVX512BW-NEXT: vpackuswb %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 @@ -473,7 +473,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] @@ -517,7 +517,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] -; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0] ; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] @@ -551,20 +551,20 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,64,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15],ymm4[24],ymm1[24],ymm4[25],ymm1[25],ymm4[26],ymm1[26],ymm4[27],ymm1[27],ymm4[28],ymm1[28],ymm4[29],ymm1[29],ymm4[30],ymm1[30],ymm4[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[16],ymm1[16],ymm4[17],ymm1[17],ymm4[18],ymm1[18],ymm4[19],ymm1[19],ymm4[20],ymm1[20],ymm4[21],ymm1[21],ymm4[22],ymm1[22],ymm4[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm4, %ymm4 ; AVX512F-NEXT: vpaddb %ymm3, %ymm4, %ymm3 @@ -585,20 +585,20 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,128,256,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [256,256,256,256,256,256,256,128,256,256,256,256,256,64,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4 ; AVX512F-NEXT: vpsubb %ymm4, %ymm0, %ymm5 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm5[8],ymm1[8],ymm5[9],ymm1[9],ymm5[10],ymm1[10],ymm5[11],ymm1[11],ymm5[12],ymm1[12],ymm5[13],ymm1[13],ymm5[14],ymm1[14],ymm5[15],ymm1[15],ymm5[24],ymm1[24],ymm5[25],ymm1[25],ymm5[26],ymm1[26],ymm5[27],ymm1[27],ymm5[28],ymm1[28],ymm5[29],ymm1[29],ymm5[30],ymm1[30],ymm5[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm5[0],ymm1[0],ymm5[1],ymm1[1],ymm5[2],ymm1[2],ymm5[3],ymm1[3],ymm5[4],ymm1[4],ymm5[5],ymm1[5],ymm5[6],ymm1[6],ymm5[7],ymm1[7],ymm5[16],ymm1[16],ymm5[17],ymm1[17],ymm5[18],ymm1[18],ymm5[19],ymm1[19],ymm5[20],ymm1[20],ymm5[21],ymm1[21],ymm5[22],ymm1[22],ymm5[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm6, %ymm5, %ymm5 ; AVX512F-NEXT: vpaddb %ymm4, %ymm5, %ymm4 @@ -624,20 +624,20 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27,137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0,137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137,27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0,27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm3 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm3[8],zmm1[8],zmm3[9],zmm1[9],zmm3[10],zmm1[10],zmm3[11],zmm1[11],zmm3[12],zmm1[12],zmm3[13],zmm1[13],zmm3[14],zmm1[14],zmm3[15],zmm1[15],zmm3[24],zmm1[24],zmm3[25],zmm1[25],zmm3[26],zmm1[26],zmm3[27],zmm1[27],zmm3[28],zmm1[28],zmm3[29],zmm1[29],zmm3[30],zmm1[30],zmm3[31],zmm1[31],zmm3[40],zmm1[40],zmm3[41],zmm1[41],zmm3[42],zmm1[42],zmm3[43],zmm1[43],zmm3[44],zmm1[44],zmm3[45],zmm1[45],zmm3[46],zmm1[46],zmm3[47],zmm1[47],zmm3[56],zmm1[56],zmm3[57],zmm1[57],zmm3[58],zmm1[58],zmm3[59],zmm1[59],zmm3[60],zmm1[60],zmm3[61],zmm1[61],zmm3[62],zmm1[62],zmm3[63],zmm1[63] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0,0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm4, %zmm4 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm3[0],zmm1[0],zmm3[1],zmm1[1],zmm3[2],zmm1[2],zmm3[3],zmm1[3],zmm3[4],zmm1[4],zmm3[5],zmm1[5],zmm3[6],zmm1[6],zmm3[7],zmm1[7],zmm3[16],zmm1[16],zmm3[17],zmm1[17],zmm3[18],zmm1[18],zmm3[19],zmm1[19],zmm3[20],zmm1[20],zmm3[21],zmm1[21],zmm3[22],zmm1[22],zmm3[23],zmm1[23],zmm3[32],zmm1[32],zmm3[33],zmm1[33],zmm3[34],zmm1[34],zmm3[35],zmm1[35],zmm3[36],zmm1[36],zmm3[37],zmm1[37],zmm3[38],zmm1[38],zmm3[39],zmm1[39],zmm3[48],zmm1[48],zmm3[49],zmm1[49],zmm3[50],zmm1[50],zmm3[51],zmm1[51],zmm3[52],zmm1[52],zmm3[53],zmm1[53],zmm3[54],zmm1[54],zmm3[55],zmm1[55] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0,0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpackuswb %zmm4, %zmm3, %zmm3 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm3, %zmm2 diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index 6e1bf25..d0bb90c 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -130,31 +130,31 @@ define <4 x i32> @mul_v4i32_1_2_4_8(<4 x i32> %a0) nounwind { ; X86-SSE2-LABEL: mul_v4i32_1_2_4_8: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2,u,8,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE2-NEXT: retl ; ; X86-SSE4-LABEL: mul_v4i32_1_2_4_8: ; X86-SSE4: # %bb.0: -; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8] ; X86-SSE4-NEXT: retl ; ; X64-SSE2-LABEL: mul_v4i32_1_2_4_8: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,u,8,u] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-SSE2-NEXT: retq ; ; X64-SSE4-LABEL: mul_v4i32_1_2_4_8: ; X64-SSE4: # %bb.0: -; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8] ; X64-SSE4-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i32_1_2_4_8: @@ -190,12 +190,12 @@ define <4 x i32> @mul_v4i32_1_2_4_8_optsize(<4 x i32> %a0) nounwind optsize { ; ; X86-SSE4-LABEL: mul_v4i32_1_2_4_8_optsize: ; X86-SSE4: # %bb.0: -; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8] ; X86-SSE4-NEXT: retl ; ; X64-SSE4-LABEL: mul_v4i32_1_2_4_8_optsize: ; X64-SSE4: # %bb.0: -; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8] ; X64-SSE4-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i32_1_2_4_8_optsize: @@ -989,7 +989,7 @@ define <2 x i64> @mul_v2i64_17_65(<2 x i64> %a0) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_17_65: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [17,65] ; X64-AVX512DQ-NEXT: retq %1 = mul <2 x i64> %a0, <i64 17, i64 65> ret <2 x i64> %1 @@ -999,36 +999,36 @@ define <4 x i32> @mul_v4i32_5_17_33_65(<4 x i32> %a0) nounwind { ; X86-SSE2-LABEL: mul_v4i32_5_17_33_65: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [5,17,33,65] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [17,u,65,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE2-NEXT: retl ; ; X86-SSE4-LABEL: mul_v4i32_5_17_33_65: ; X86-SSE4: # %bb.0: -; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [5,17,33,65] ; X86-SSE4-NEXT: retl ; ; X64-SSE2-LABEL: mul_v4i32_5_17_33_65: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [5,17,33,65] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [17,u,65,u] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-SSE2-NEXT: retq ; ; X64-SSE4-LABEL: mul_v4i32_5_17_33_65: ; X64-SSE4: # %bb.0: -; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [5,17,33,65] ; X64-SSE4-NEXT: retq ; ; X64-AVX-LABEL: mul_v4i32_5_17_33_65: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [5,17,33,65] ; X64-AVX-NEXT: retq %1 = mul <4 x i32> %a0, <i32 5, i32 17, i32 33, i32 65> ret <4 x i32> %1 @@ -1384,7 +1384,7 @@ define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_15_63: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [15,63] ; X64-AVX512DQ-NEXT: retq %1 = mul <2 x i64> %a0, <i64 15, i64 63> ret <2 x i64> %1 @@ -1427,7 +1427,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE2-NEXT: psrlq $32, %xmm3 ; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295] ; X64-SSE2-NEXT: paddq %xmm3, %xmm0 ; X64-SSE2-NEXT: psllq $32, %xmm0 ; X64-SSE2-NEXT: paddq %xmm2, %xmm0 @@ -1441,7 +1441,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { ; X64-SSE4-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE4-NEXT: psrlq $32, %xmm3 ; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295] ; X64-SSE4-NEXT: paddq %xmm3, %xmm0 ; X64-SSE4-NEXT: psllq $32, %xmm0 ; X64-SSE4-NEXT: paddq %xmm2, %xmm0 @@ -1453,7 +1453,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295] ; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1465,7 +1465,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295] ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1473,7 +1473,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_neg_15_63: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18446744073709551601,18446744073709551553] ; X64-AVX512DQ-NEXT: retq %1 = mul <2 x i64> %a0, <i64 -15, i64 -63> ret <2 x i64> %1 @@ -1516,7 +1516,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE2-NEXT: psrlq $32, %xmm3 ; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295] ; X64-SSE2-NEXT: paddq %xmm3, %xmm0 ; X64-SSE2-NEXT: psllq $32, %xmm0 ; X64-SSE2-NEXT: paddq %xmm2, %xmm0 @@ -1530,7 +1530,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { ; X64-SSE4-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE4-NEXT: psrlq $32, %xmm3 ; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295] ; X64-SSE4-NEXT: paddq %xmm3, %xmm0 ; X64-SSE4-NEXT: psllq $32, %xmm0 ; X64-SSE4-NEXT: paddq %xmm2, %xmm0 @@ -1542,7 +1542,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295] ; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1554,7 +1554,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295] ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1562,7 +1562,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_neg_17_65: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18446744073709551599,18446744073709551551] ; X64-AVX512DQ-NEXT: retq %1 = mul <2 x i64> %a0, <i64 -17, i64 -65> ret <2 x i64> %1 @@ -1600,7 +1600,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 ; X86-SSE2-NEXT: psrlq $32, %xmm3 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u] ; X86-SSE2-NEXT: paddq %xmm3, %xmm0 ; X86-SSE2-NEXT: psllq $32, %xmm0 ; X86-SSE2-NEXT: paddq %xmm2, %xmm0 @@ -1614,7 +1614,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; X86-SSE4-NEXT: movdqa %xmm0, %xmm3 ; X86-SSE4-NEXT: psrlq $32, %xmm3 ; X86-SSE4-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u] ; X86-SSE4-NEXT: paddq %xmm3, %xmm0 ; X86-SSE4-NEXT: psllq $32, %xmm0 ; X86-SSE4-NEXT: paddq %xmm2, %xmm0 @@ -1628,7 +1628,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE2-NEXT: psrlq $32, %xmm3 ; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-SSE2-NEXT: paddq %xmm3, %xmm0 ; X64-SSE2-NEXT: psllq $32, %xmm0 ; X64-SSE2-NEXT: paddq %xmm2, %xmm0 @@ -1642,7 +1642,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; X64-SSE4-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE4-NEXT: psrlq $32, %xmm3 ; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-SSE4-NEXT: paddq %xmm3, %xmm0 ; X64-SSE4-NEXT: psllq $32, %xmm0 ; X64-SSE4-NEXT: paddq %xmm2, %xmm0 @@ -1654,7 +1654,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1666,7 +1666,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1674,7 +1674,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_neg_0_1: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; X64-AVX512DQ-NEXT: retq %1 = mul <2 x i64> %a0, <i64 0, i64 -1> ret <2 x i64> %1 @@ -1689,7 +1689,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 ; X86-SSE2-NEXT: psrlq $32, %xmm3 ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u] ; X86-SSE2-NEXT: paddq %xmm3, %xmm0 ; X86-SSE2-NEXT: psllq $32, %xmm0 ; X86-SSE2-NEXT: paddq %xmm2, %xmm0 @@ -1703,7 +1703,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; X86-SSE4-NEXT: movdqa %xmm0, %xmm3 ; X86-SSE4-NEXT: psrlq $32, %xmm3 ; X86-SSE4-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u] ; X86-SSE4-NEXT: paddq %xmm3, %xmm0 ; X86-SSE4-NEXT: psllq $32, %xmm0 ; X86-SSE4-NEXT: paddq %xmm2, %xmm0 @@ -1717,7 +1717,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE2-NEXT: psrlq $32, %xmm3 ; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-SSE2-NEXT: paddq %xmm3, %xmm0 ; X64-SSE2-NEXT: psllq $32, %xmm0 ; X64-SSE2-NEXT: paddq %xmm2, %xmm0 @@ -1731,7 +1731,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; X64-SSE4-NEXT: movdqa %xmm0, %xmm3 ; X64-SSE4-NEXT: psrlq $32, %xmm3 ; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-SSE4-NEXT: paddq %xmm3, %xmm0 ; X64-SSE4-NEXT: psllq $32, %xmm0 ; X64-SSE4-NEXT: paddq %xmm2, %xmm0 @@ -1743,7 +1743,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1755,7 +1755,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 ; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0] ; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 @@ -1763,7 +1763,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_15_neg_63: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [15,18446744073709551553] ; X64-AVX512DQ-NEXT: retq %1 = mul <2 x i64> %a0, <i64 15, i64 -63> ret <2 x i64> %1 @@ -1773,36 +1773,36 @@ define <4 x i32> @mul_v4i32_0_15_31_7(<4 x i32> %a0) nounwind { ; X86-SSE2-LABEL: mul_v4i32_0_15_31_7: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,15,31,7] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [15,u,7,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE2-NEXT: retl ; ; X86-SSE4-LABEL: mul_v4i32_0_15_31_7: ; X86-SSE4: # %bb.0: -; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,15,31,7] ; X86-SSE4-NEXT: retl ; ; X64-SSE2-LABEL: mul_v4i32_0_15_31_7: ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,15,31,7] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [15,u,7,u] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-SSE2-NEXT: retq ; ; X64-SSE4-LABEL: mul_v4i32_0_15_31_7: ; X64-SSE4: # %bb.0: -; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,15,31,7] ; X64-SSE4-NEXT: retq ; ; X64-AVX-LABEL: mul_v4i32_0_15_31_7: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,15,31,7] ; X64-AVX-NEXT: retq %1 = mul <4 x i32> %a0, <i32 0, i32 15, i32 31, i32 7> ret <4 x i32> %1 @@ -1947,7 +1947,7 @@ define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_68_132: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [68,132] ; X64-AVX512DQ-NEXT: retq %mul = mul <2 x i64> %x, <i64 68, i64 132> ret <2 x i64> %mul @@ -2009,7 +2009,7 @@ define <2 x i64> @mul_v2i64_60_120(<2 x i64> %x) nounwind { ; ; X64-AVX512DQ-LABEL: mul_v2i64_60_120: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [60,124] ; X64-AVX512DQ-NEXT: retq %mul = mul <2 x i64> %x, <i64 60, i64 124> ret <2 x i64> %mul diff --git a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll index 983ae59..3d85d55 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll @@ -851,7 +851,7 @@ define i16 @test_v4i16_v4i8(<4 x i16> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; SSE2-NEXT: paddw %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 93f4ce7..0bf5a8d 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -1092,9 +1092,9 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind { ; SSE2-LABEL: constant_rotate_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1106,8 +1106,8 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind { ; SSE41-LABEL: constant_rotate_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u] +; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1118,8 +1118,8 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind { ; AVX1-LABEL: constant_rotate_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] @@ -1156,9 +1156,9 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind { ; X86-SSE2-LABEL: constant_rotate_v4i32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,128,u] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 64c3118..5ae3e2f 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -895,13 +895,13 @@ define <8 x i32> @constant_rotate_v8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: constant_rotate_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [512,u,2048,u] ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [256,512,1024,2048] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm2 = ymm0[1,1,3,3,5,5,7,7] ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7] diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index d565ef0..1602cde 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1673,7 +1673,7 @@ define <16 x i8> @constant_shift_v16i8_pairs(<16 x i8> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,0,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,128,0,16,0,2,0,32,0,64,0,0,0,8,0,4] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [64,64,8,8,1,1,16,16,32,32,128,128,4,4,2,2] @@ -1750,7 +1750,7 @@ define <16 x i8> @constant_shift_v16i8_pairs(<16 x i8> %a) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,0,65535,65535] ; X86-SSE-NEXT: pandn %xmm0, %xmm1 -; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,128,0,16,0,2,0,32,0,64,0,0,0,8,0,4] ; X86-SSE-NEXT: por %xmm1, %xmm0 ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [64,64,8,8,1,1,16,16,32,32,128,128,4,4,2,2] diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index 8cb2c7b..a847da6 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -1223,7 +1223,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -1275,7 +1275,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE-NEXT: pandn %xmm0, %xmm1 -; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2] ; X86-SSE-NEXT: por %xmm1, %xmm0 ; X86-SSE-NEXT: retl %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll index 57874c4..eb39b6a 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll @@ -1480,7 +1480,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm0, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -1532,7 +1532,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE-NEXT: pandn %xmm0, %xmm1 -; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u] ; X86-SSE-NEXT: por %xmm1, %xmm0 ; X86-SSE-NEXT: retl %shift = lshr <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3> diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll index 99dac74..3085c32 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -987,21 +987,21 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind { ; SSE2-LABEL: constant_shift_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: constant_shift_v4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128] ; SSE41-NEXT: retq ; ; AVX1-LABEL: constant_shift_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128] ; AVX1-NEXT: retq ; ; AVX2-LABEL: constant_shift_v4i32: @@ -1032,9 +1032,9 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind { ; X86-SSE-LABEL: constant_shift_v4i32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,128,u] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index b56a8b5..f9ccd1e 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -1117,9 +1117,9 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: constant_shift_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [16,32,64,128] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,512,256,128] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; @@ -1153,9 +1153,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { ; ; X86-AVX1-LABEL: constant_shift_v8i32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [16,32,64,128] ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [256,512,256,128] ; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 0e20b18..18d79b6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -778,9 +778,9 @@ define <16 x i8> @combine_shl_pshufb(<4 x i32> %a0) { ; SSSE3-LABEL: combine_shl_pshufb: ; SSSE3: # %bb.0: ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,256,65536,65536] ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,u,65536,u] ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15] @@ -788,13 +788,13 @@ define <16 x i8> @combine_shl_pshufb(<4 x i32> %a0) { ; ; SSE41-LABEL: combine_shl_pshufb: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,256,65536,65536] ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15] ; SSE41-NEXT: retq ; ; AVX1-LABEL: combine_shl_pshufb: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,256,65536,65536] ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15] ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index 1af7542..4235377 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -2110,7 +2110,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,3] ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq @@ -2119,7 +2119,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -2127,7 +2127,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX2-SLOW: # %bb.0: ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] -; AVX2-SLOW-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-SLOW-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3] ; AVX2-SLOW-NEXT: vzeroupper ; AVX2-SLOW-NEXT: retq ; @@ -2135,7 +2135,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX2-FAST-ALL: # %bb.0: ; AVX2-FAST-ALL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,2,4,6,0,0,0,0] ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-ALL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-FAST-ALL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3] ; AVX2-FAST-ALL-NEXT: vzeroupper ; AVX2-FAST-ALL-NEXT: retq ; @@ -2143,7 +2143,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX2-FAST-PERLANE: # %bb.0: ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] -; AVX2-FAST-PERLANE-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-FAST-PERLANE-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3] ; AVX2-FAST-PERLANE-NEXT: vzeroupper ; AVX2-FAST-PERLANE-NEXT: retq ; @@ -2151,7 +2151,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> @@ -2253,13 +2253,13 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; SSE-LABEL: trunc_mul_const_v16i64_v16i8: ; SSE: # %bb.0: -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,3] +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,5] +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [6,7] +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [8,9] +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [10,11] +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [12,13] +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7 # [14,15] ; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] ; SSE-NEXT: pand %xmm8, %xmm7 ; SSE-NEXT: pand %xmm8, %xmm6 @@ -2280,18 +2280,18 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX1-LABEL: trunc_mul_const_v16i64_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 # [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm5 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,3] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm5 # [4,5] ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm6 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [6,7] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm6 # [8,9] ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm7 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [10,11] +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm7 # [12,13] ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [14,15] ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm8 = [255,255] ; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm7 @@ -2313,10 +2313,10 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX2-LABEL: trunc_mul_const_v16i64_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3] +; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [4,5,6,7] +; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [8,9,10,11] +; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [12,13,14,15] ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] ; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3 ; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2 @@ -2335,8 +2335,8 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512F-LABEL: trunc_mul_const_v16i64_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7] +; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [8,9,10,11,12,13,14,15] ; AVX512F-NEXT: vpmovqb %zmm1, %xmm1 ; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2345,8 +2345,8 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512BW-LABEL: trunc_mul_const_v16i64_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7] +; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [8,9,10,11,12,13,14,15] ; AVX512BW-NEXT: vpmovqb %zmm1, %xmm1 ; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2355,8 +2355,8 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512DQ-LABEL: trunc_mul_const_v16i64_v16i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7] +; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [8,9,10,11,12,13,14,15] ; AVX512DQ-NEXT: vpmovqb %zmm1, %xmm1 ; AVX512DQ-NEXT: vpmovqb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2371,27 +2371,27 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; SSE-LABEL: trunc_mul_const_v16i32_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,1,2,3] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [1,u,3,u] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4,5,6,7] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [5,u,7,u] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [8,9,10,11] ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [9,u,11,u] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [12,13,14,15] ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [13,u,15,u] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] @@ -2406,12 +2406,12 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; ; AVX1-LABEL: trunc_mul_const_v16i32_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [0,1,2,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,5,6,7] +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3 # [8,9,10,11] ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [12,13,14,15] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [255,255,255,255] ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 @@ -2425,8 +2425,8 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; ; AVX2-LABEL: trunc_mul_const_v16i32_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3,4,5,6,7] +; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [8,9,10,11,12,13,14,15] ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 @@ -2439,7 +2439,7 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; ; AVX512-LABEL: trunc_mul_const_v16i32_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll index 17315c4..1c5be03 100644 --- a/llvm/test/CodeGen/X86/vselect-avx.ll +++ b/llvm/test/CodeGen/X86/vselect-avx.ll @@ -95,7 +95,7 @@ bb: define void @test3(<4 x i32> %induction30, ptr %tmp16, ptr %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) { ; AVX1-LABEL: test3: ; AVX1: ## %bb.0: -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## [2863311531,2863311531,2863311531,2863311531] ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll index 8543e9f..16700d4 100644 --- a/llvm/test/CodeGen/X86/vselect-pcmp.ll +++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll @@ -1046,7 +1046,7 @@ define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; AVX1-LABEL: blend_mask_cond_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,4194304,1073741824,2147483648] ; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 ; AVX1-NEXT: retq ; @@ -1211,9 +1211,9 @@ define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { ; AVX1-LABEL: blend_mask_cond_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 # [2147483648,1073741824,268435456,536870912] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,2097152,1073741824,524288] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 ; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index b2064b1..02d4d88 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -181,40 +181,38 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; CHECK-LABEL: segmentedStack: ; CHECK: ## %bb.0: ; CHECK-NEXT: cmpq %gs:816, %rsp -; CHECK-NEXT: jbe LBB3_6 +; CHECK-NEXT: jbe LBB3_7 ; CHECK-NEXT: LBB3_1: ## %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: sete %al -; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: sete %cl -; CHECK-NEXT: orb %al, %cl ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: sete %al -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB3_4 -; CHECK-NEXT: ## %bb.2: ## %if.end4.i +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB3_5 +; CHECK-NEXT: ## %bb.2: ## %entry +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB3_5 +; CHECK-NEXT: ## %bb.3: ## %if.end4.i ; CHECK-NEXT: movq 8(%rdi), %rdx ; CHECK-NEXT: cmpq 8(%rsi), %rdx -; CHECK-NEXT: jne LBB3_5 -; CHECK-NEXT: ## %bb.3: ## %land.rhs.i.i +; CHECK-NEXT: jne LBB3_6 +; CHECK-NEXT: ## %bb.4: ## %land.rhs.i.i ; CHECK-NEXT: movq (%rsi), %rsi ; CHECK-NEXT: movq (%rdi), %rdi ; CHECK-NEXT: callq _memcmp ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: sete %al -; CHECK-NEXT: LBB3_4: ## %__go_ptr_strings_equal.exit +; CHECK-NEXT: LBB3_5: ## %__go_ptr_strings_equal.exit ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_5: +; CHECK-NEXT: LBB3_6: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_6: +; CHECK-NEXT: LBB3_7: ; CHECK-NEXT: movl $8, %r10d ; CHECK-NEXT: movl $0, %r11d ; CHECK-NEXT: callq ___morestack @@ -224,43 +222,41 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; NOCOMPACTUNWIND-LABEL: segmentedStack: ; NOCOMPACTUNWIND: # %bb.0: ; NOCOMPACTUNWIND-NEXT: cmpq %fs:112, %rsp -; NOCOMPACTUNWIND-NEXT: jbe .LBB3_6 +; NOCOMPACTUNWIND-NEXT: jbe .LBB3_7 ; NOCOMPACTUNWIND-NEXT: .LBB3_1: # %entry ; NOCOMPACTUNWIND-NEXT: pushq %rax ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 -; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi -; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi -; NOCOMPACTUNWIND-NEXT: sete %cl -; NOCOMPACTUNWIND-NEXT: orb %al, %cl ; NOCOMPACTUNWIND-NEXT: movq %rdi, %rax ; NOCOMPACTUNWIND-NEXT: orq %rsi, %rax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testb %cl, %cl -; NOCOMPACTUNWIND-NEXT: jne .LBB3_4 -; NOCOMPACTUNWIND-NEXT: # %bb.2: # %if.end4.i +; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi +; NOCOMPACTUNWIND-NEXT: je .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.2: # %entry +; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi +; NOCOMPACTUNWIND-NEXT: je .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.3: # %if.end4.i ; NOCOMPACTUNWIND-NEXT: movq 8(%rdi), %rdx ; NOCOMPACTUNWIND-NEXT: cmpq 8(%rsi), %rdx -; NOCOMPACTUNWIND-NEXT: jne .LBB3_5 -; NOCOMPACTUNWIND-NEXT: # %bb.3: # %land.rhs.i.i +; NOCOMPACTUNWIND-NEXT: jne .LBB3_6 +; NOCOMPACTUNWIND-NEXT: # %bb.4: # %land.rhs.i.i ; NOCOMPACTUNWIND-NEXT: movq (%rsi), %rsi ; NOCOMPACTUNWIND-NEXT: movq (%rdi), %rdi ; NOCOMPACTUNWIND-NEXT: callq memcmp@PLT ; NOCOMPACTUNWIND-NEXT: testl %eax, %eax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: .LBB3_4: # %__go_ptr_strings_equal.exit +; NOCOMPACTUNWIND-NEXT: .LBB3_5: # %__go_ptr_strings_equal.exit ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_5: +; NOCOMPACTUNWIND-NEXT: .LBB3_6: ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 ; NOCOMPACTUNWIND-NEXT: xorl %eax, %eax ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_6: +; NOCOMPACTUNWIND-NEXT: .LBB3_7: ; NOCOMPACTUNWIND-NEXT: movl $8, %r10d ; NOCOMPACTUNWIND-NEXT: movl $0, %r11d ; NOCOMPACTUNWIND-NEXT: callq __morestack diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll new file mode 100644 index 0000000..d9253e0 --- /dev/null +++ b/llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 -verify-machineinstrs | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512VL +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl,+avx512bw -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512BW + +define void @zero_xmm(<4 x i32> %arg) #0 { +; SSE-LABEL: zero_xmm: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, 0 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: zero_xmm: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm0, 0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: zero_xmm: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovaps %xmm0, 0 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq + store <4 x i32> %arg, ptr null, align 32 + ret void +} + +define void @zero_ymm(<8 x i32> %arg) #0 { +; SSE-LABEL: zero_ymm: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, 16 +; SSE-NEXT: movaps %xmm0, 0 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: zero_ymm: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %ymm0, 0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: zero_ymm: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovaps %ymm0, 0 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + store <8 x i32> %arg, ptr null, align 32 + ret void +} + +define void @zero_zmm(<16 x i32> %arg) #0 { +; SSE-LABEL: zero_zmm: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm3, 48 +; SSE-NEXT: movaps %xmm2, 32 +; SSE-NEXT: movaps %xmm1, 16 +; SSE-NEXT: movaps %xmm0, 0 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: xorps %xmm3, %xmm3 +; SSE-NEXT: retq +; +; AVX-LABEL: zero_zmm: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %ymm1, 32 +; AVX-NEXT: vmovaps %ymm0, 0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX512-LABEL: zero_zmm: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovups %zmm0, 0 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + store <16 x i32> %arg, ptr null, align 32 + ret void +} + +define void @zero_k(<8 x i32> %arg, <8 x i1> %mask) #0 { +; SSE-LABEL: zero_k: +; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: testb $1, %al +; SSE-NEXT: jne .LBB3_1 +; SSE-NEXT: # %bb.2: # %else +; SSE-NEXT: testb $2, %al +; SSE-NEXT: jne .LBB3_3 +; SSE-NEXT: .LBB3_4: # %else2 +; SSE-NEXT: testb $4, %al +; SSE-NEXT: jne .LBB3_5 +; SSE-NEXT: .LBB3_6: # %else4 +; SSE-NEXT: testb $8, %al +; SSE-NEXT: jne .LBB3_7 +; SSE-NEXT: .LBB3_8: # %else6 +; SSE-NEXT: testb $16, %al +; SSE-NEXT: jne .LBB3_9 +; SSE-NEXT: .LBB3_10: # %else8 +; SSE-NEXT: testb $32, %al +; SSE-NEXT: jne .LBB3_11 +; SSE-NEXT: .LBB3_12: # %else10 +; SSE-NEXT: testb $64, %al +; SSE-NEXT: jne .LBB3_13 +; SSE-NEXT: .LBB3_14: # %else12 +; SSE-NEXT: testb $-128, %al +; SSE-NEXT: je .LBB3_16 +; SSE-NEXT: .LBB3_15: # %cond.store13 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3] +; SSE-NEXT: movd %xmm0, 28 +; SSE-NEXT: .LBB3_16: # %else14 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: retq +; SSE-NEXT: .LBB3_1: # %cond.store +; SSE-NEXT: movd %xmm0, 0 +; SSE-NEXT: testb $2, %al +; SSE-NEXT: je .LBB3_4 +; SSE-NEXT: .LBB3_3: # %cond.store1 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; SSE-NEXT: movd %xmm2, 4 +; SSE-NEXT: testb $4, %al +; SSE-NEXT: je .LBB3_6 +; SSE-NEXT: .LBB3_5: # %cond.store3 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE-NEXT: movd %xmm2, 8 +; SSE-NEXT: testb $8, %al +; SSE-NEXT: je .LBB3_8 +; SSE-NEXT: .LBB3_7: # %cond.store5 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE-NEXT: movd %xmm0, 12 +; SSE-NEXT: testb $16, %al +; SSE-NEXT: je .LBB3_10 +; SSE-NEXT: .LBB3_9: # %cond.store7 +; SSE-NEXT: movd %xmm1, 16 +; SSE-NEXT: testb $32, %al +; SSE-NEXT: je .LBB3_12 +; SSE-NEXT: .LBB3_11: # %cond.store9 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; SSE-NEXT: movd %xmm0, 20 +; SSE-NEXT: testb $64, %al +; SSE-NEXT: je .LBB3_14 +; SSE-NEXT: .LBB3_13: # %cond.store11 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE-NEXT: movd %xmm0, 24 +; SSE-NEXT: testb $-128, %al +; SSE-NEXT: jne .LBB3_15 +; SSE-NEXT: jmp .LBB3_16 +; +; AVX1-LABEL: zero_k: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX1-NEXT: vpslld $31, %xmm2, %xmm2 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmaskmovps %ymm0, %ymm1, 0 +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: zero_k: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX2-NEXT: vpslld $31, %ymm1, %ymm1 +; AVX2-NEXT: vpmaskmovd %ymm0, %ymm1, 0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: zero_k: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1 +; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1 +; AVX512VL-NEXT: vmovdqa32 %ymm0, 0 {%k1} +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: kxorw %k0, %k0, %k1 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: zero_k: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsllw $15, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovw2m %xmm1, %k1 +; AVX512BW-NEXT: vmovdqa32 %ymm0, 0 {%k1} +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: kxorq %k0, %k0, %k1 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq + tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %arg, ptr null, i32 32, <8 x i1> %mask) + ret void +} + +attributes #0 = { "zero-call-used-regs"="used" } |