; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X86 ; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X86 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X64 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X64 declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test1: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: phaddw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test1: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: phaddw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %2 = bitcast <4 x i16> %1 to <1 x i64> %3 = bitcast <4 x i16> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test88: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pcmpgtd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test88: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pcmpgtd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test87: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pcmpgtw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test87: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pcmpgtw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test86: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pcmpgtb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test86: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pcmpgtb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test85: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pcmpeqd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test85: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pcmpeqd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test84: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pcmpeqw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test84: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pcmpeqw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test83: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pcmpeqb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test83: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pcmpeqb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test82: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test82: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test81: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test81: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test80: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test80: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test79: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test79: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: punpckhdq %mm0, %mm1 # mm1 = mm1[1],mm0[1] ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test78: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test78: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: punpckhwd %mm0, %mm1 # mm1 = mm1[2],mm0[2],mm1[3],mm0[3] ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test77: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test77: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: punpckhbw %mm0, %mm1 # mm1 = mm1[4],mm0[4],mm1[5],mm0[5],mm1[6],mm0[6],mm1[7],mm0[7] ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test76: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: packuswb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test76: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: packuswb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test75: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: packssdw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test75: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: packssdw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test74: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: packsswb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test74: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: packsswb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) nounwind readnone define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test73: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psrad $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test73: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: psrad $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) nounwind readnone define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test72: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psraw $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test72: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: psraw $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test72_2: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test72_2: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 0) nounwind %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test71: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrlq $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test71: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: psrlq $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to i64 ret i64 %2 } declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) nounwind readnone define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test70: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psrld $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test70: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: psrld $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test70_2: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test70_2: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 0) nounwind %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) nounwind readnone define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test69: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psrlw $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test69: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: psrlw $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test68: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psllq $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test68: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: psllq $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to i64 ret i64 %2 } declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) nounwind readnone define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test67: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pslld $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test67: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pslld $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to <2 x i32> %3 = bitcast <2 x i32> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) nounwind readnone define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test66: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psllw $3, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test66: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: psllw $3, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 3) nounwind %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test66_2: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test66_2: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 0) nounwind %2 = bitcast <1 x i64> %1 to <4 x i16> %3 = bitcast <4 x i16> %2 to <1 x i64> %4 = extractelement <1 x i64> %3, i32 0 ret i64 %4 } declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test65: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psrad 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test65: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psrad %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test64: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psraw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test64: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psraw %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test63: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psrlq 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test63: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psrlq %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test62: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psrld 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test62: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psrld %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test61: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psrlw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test61: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psrlw %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test60: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psllq 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test60: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psllq %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test59: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pslld 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test59: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: pslld %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test58: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psllw 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test58: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psllw %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1.i = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test56: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pxor {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test56: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pxor %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test55: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: por {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test55: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: por %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test54: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pandn {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test54: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pandn %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test53: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pand {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test53: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pand %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test52: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test52: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmullw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test51: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test51: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmullw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test50: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmulhw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test50: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmulhw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test49: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmaddwd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test49: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmaddwd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test48: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psubusw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test48: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psubusw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test47: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psubusb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test47: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psubusb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test46: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psubsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test46: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psubsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test45: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psubsb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test45: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psubsb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test44: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: psubq 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test44: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: psubq %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1 = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test43: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psubd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test43: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psubd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test42: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psubw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test42: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psubw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test41: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psubb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test41: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psubb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test40: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: paddusw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test40: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: paddusw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test39: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: paddusb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test39: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: paddusb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test38: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: paddsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test38: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: paddsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test37: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: paddsb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test37: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: paddsb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test36: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: paddq 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test36: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: paddq %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1 = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test35: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: paddd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test35: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: paddd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test34: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: paddw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test34: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: paddw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test33: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: paddb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test33: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: paddb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test32: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psadbw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test32: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psadbw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test31: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pminsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test31: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pminsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test30: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pminub {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test30: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pminub %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test29: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmaxsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test29: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmaxsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test28: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmaxub {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test28: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmaxub %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test27: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pavgw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test27: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pavgw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test26: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pavgb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test26: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pavgb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare void @llvm.x86.mmx.movnt.dq(ptr, <1 x i64>) nounwind define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp { ; X86-LABEL: test25: ; X86: # %bb.0: # %entry ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movntq %mm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: test25: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movntq %mm0, (%rdi) ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to <1 x i64> tail call void @llvm.x86.mmx.movnt.dq(ptr %p, <1 x i64> %mmx_var.i) nounwind ret void } declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) nounwind readnone define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test24: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: movq (%esp), %mm0 ; X86-NEXT: pmovmskb %mm0, %eax ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test24: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pmovmskb %mm0, %eax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %0 to <1 x i64> %1 = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) nounwind ret i32 %1 } declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp { ; X86-LABEL: test23: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: movl 24(%ebp), %edi ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq (%esp), %mm1 ; X86-NEXT: maskmovq %mm0, %mm1 ; X86-NEXT: leal -4(%ebp), %esp ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test23: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: maskmovq %mm0, %mm1 ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %n to <8 x i8> %1 = bitcast <1 x i64> %d to <8 x i8> %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> tail call void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) nounwind ret void } declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test22: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmulhuw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test22: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmulhuw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test21: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0] ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test21: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test21_2: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: pshufw $3, (%esp), %mm0 # mm0 = mem[3,0,0,0] ; X86-NEXT: movd %mm0, %eax ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test21_2: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] ; X64-NEXT: movd %mm0, %eax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <2 x i32> %5 = extractelement <2 x i32> %4, i32 0 ret i32 %5 } declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test20: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmuludq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test20: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmuludq %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test19: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: cvtpi2pd (%esp), %xmm0 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test19: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: cvtpi2pd %mm0, %xmm0 ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %1) nounwind readnone ret <2 x double> %2 } declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { ; X86-LABEL: test18: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: cvttpd2pi %xmm0, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test18: ; X64: # %bb.0: # %entry ; X64-NEXT: cvttpd2pi %xmm0, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast <1 x i64> %0 to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = extractelement <1 x i64> %2, i32 0 ret i64 %3 } declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { ; X86-LABEL: test17: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: cvtpd2pi %xmm0, %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test17: ; X64: # %bb.0: # %entry ; X64-NEXT: cvtpd2pi %xmm0, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast <1 x i64> %0 to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = extractelement <1 x i64> %2, i32 0 ret i64 %3 } declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test16: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movq 8(%ebp), %mm0 ; X86-NEXT: palignr $16, 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test16: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: movq %rsi, %mm1 ; X64-NEXT: palignr $16, %mm1, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to <1 x i64> %1 = extractelement <1 x i64> %b, i32 0 %mmx_var1 = bitcast i64 %1 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16) %3 = bitcast <1 x i64> %2 to i64 ret i64 %3 } declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test15: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: pabsd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test15: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pabsd %mm0, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %1) nounwind readnone %3 = bitcast <1 x i64> %2 to <2 x i32> %4 = bitcast <2 x i32> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test14: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: pabsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test14: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pabsw %mm0, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %1) nounwind readnone %3 = bitcast <1 x i64> %2 to <4 x i16> %4 = bitcast <4 x i16> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { ; X86-LABEL: test13: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: pabsb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test13: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pabsb %mm0, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to <1 x i64> %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %1) nounwind readnone %3 = bitcast <1 x i64> %2 to <8 x i8> %4 = bitcast <8 x i8> %3 to <1 x i64> %5 = extractelement <1 x i64> %4, i32 0 ret i64 %5 } declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test12: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psignd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test12: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psignd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = bitcast <2 x i32> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test11: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psignw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test11: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psignw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %2 = bitcast <4 x i16> %1 to <1 x i64> %3 = bitcast <4 x i16> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test10: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: psignb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test10: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: psignb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %2 = bitcast <8 x i8> %1 to <1 x i64> %3 = bitcast <8 x i8> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test9: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pshufb {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test9: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pshufb %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %2 = bitcast <8 x i8> %1 to <1 x i64> %3 = bitcast <8 x i8> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test8: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmulhrsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test8: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmulhrsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %2 = bitcast <4 x i16> %1 to <1 x i64> %3 = bitcast <4 x i16> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test7: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pmaddubsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test7: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: pmaddubsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> %2 = bitcast <8 x i8> %1 to <1 x i64> %3 = bitcast <8 x i8> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <8 x i8> %6 = bitcast <8 x i8> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test6: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: phsubsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test6: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: phsubsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %2 = bitcast <4 x i16> %1 to <1 x i64> %3 = bitcast <4 x i16> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test5: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: phsubd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test5: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: phsubd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = bitcast <2 x i32> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test4: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: phsubw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test4: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: phsubw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %2 = bitcast <4 x i16> %1 to <1 x i64> %3 = bitcast <4 x i16> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test3: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: phaddsw {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test3: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: phaddsw %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> %2 = bitcast <4 x i16> %1 to <1 x i64> %3 = bitcast <4 x i16> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <4 x i16> %6 = bitcast <4 x i16> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { ; X86-LABEL: test2: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $24, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: phaddd {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test2: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsi, %mm0 ; X64-NEXT: movq %rdi, %mm1 ; X64-NEXT: phaddd %mm0, %mm1 ; X64-NEXT: movq %mm1, %rax ; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> %2 = bitcast <2 x i32> %1 to <1 x i64> %3 = bitcast <2 x i32> %0 to <1 x i64> %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone %5 = bitcast <1 x i64> %4 to <2 x i32> %6 = bitcast <2 x i32> %5 to <1 x i64> %7 = extractelement <1 x i64> %6, i32 0 ret i64 %7 } define <4 x float> @test89(<4 x float> %a, <1 x i64> %b) nounwind { ; X86-LABEL: test89: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: cvtpi2ps (%esp), %xmm0 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test89: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: cvtpi2ps %mm0, %xmm0 ; X64-NEXT: retq %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b) ret <4 x float> %c } declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone define void @test90() { ; ALL-LABEL: test90: ; ALL: # %bb.0: ; ALL-NEXT: emms ; ALL-NEXT: ret{{[l|q]}} call void @llvm.x86.mmx.emms() ret void } declare void @llvm.x86.mmx.emms() define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind { ; X86-LABEL: test_mm_insert_pi16: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 ; X86-NEXT: pinsrw $2, 16(%ebp), %mm0 ; X86-NEXT: movq %mm0, (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test_mm_insert_pi16: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pinsrw $2, %esi, %mm0 ; X64-NEXT: movq %mm0, %rax ; X64-NEXT: retq entry: %1 = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2) ret <1 x i64> %1 } declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32 immarg) define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind { ; X86-LABEL: test_mm_extract_pi16: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: movq (%esp), %mm0 ; X86-NEXT: pextrw $2, %mm0, %eax ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: test_mm_extract_pi16: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %mm0 ; X64-NEXT: pextrw $2, %mm0, %eax ; X64-NEXT: retq entry: %1 = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2) ret i32 %1 } declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32 immarg)