; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s define <4 x i32> @hadd_select_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: hadd_select_v4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: %and1 = and <4 x i32> %x, %and2 = and <4 x i32> %y, %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2) %cond = icmp ule <4 x i32> %hadd, %ret = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %hadd ret <4 x i32> %ret } define <8 x i8> @hadd_trunc_v8i16(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: hadd_trunc_v8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3] ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 ; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: %and1 = and <8 x i16> %x, %and2 = and <8 x i16> %y, %hadd = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %and1, <8 x i16> %and2) %conv = trunc <8 x i16> %hadd to <8 x i8> ret <8 x i8> %conv } define <8 x i16> @hadd_trunc_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: hadd_trunc_v8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [3,3,3,3,3,3,3,3] ; CHECK-NEXT: vpand %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vpand %ymm2, %ymm1, %ymm1 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %and1 = and <8 x i32> %x, %and2 = and <8 x i32> %y, %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2) %conv = trunc <8 x i32> %hadd to <8 x i16> ret <8 x i16> %conv } define <16 x i8> @hadd_trunc_v16i16(<16 x i16> %x, <16 x i16> %y) { ; CHECK-LABEL: hadd_trunc_v16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] ; CHECK-NEXT: vpand %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vpand %ymm2, %ymm1, %ymm1 ; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %and1 = and <16 x i16> %x, %and2 = and <16 x i16> %y, %hadd = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %and1, <16 x i16> %and2) %conv = trunc <16 x i16> %hadd to <16 x i8> ret <16 x i8> %conv } define <4 x i32> @hsub_select_shl_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: hsub_select_shl_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %or1 = or <4 x i32> %x, %or2 = or <4 x i32> %y, %hsub = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %or1, <4 x i32> %or2) %shl = shl <4 x i32> %hsub, %cond = icmp ule <4 x i32> %shl, %ret = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %hsub ret <4 x i32> %ret } define <8 x i8> @hsub_trunc_v8i16(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: hsub_trunc_v8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: %or1 = or <8 x i16> %x, %or2 = or <8 x i16> %y, %hsub = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %or1, <8 x i16> %or2) %conv = trunc <8 x i16> %hsub to <8 x i8> ret <8 x i8> %conv } define <8 x i16> @hsub_trunc_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: hsub_trunc_v8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: %or1 = or <8 x i32> %x, %or2 = or <8 x i32> %y, %hsub = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %or1, <8 x i32> %or2) %conv = trunc <8 x i32> %hsub to <8 x i16> ret <8 x i16> %conv } define <16 x i8> @hsub_trunc_v16i16(<16 x i16> %x, <16 x i16> %y) { ; CHECK-LABEL: hsub_trunc_v16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: %or1 = or <16 x i16> %x, %or2 = or <16 x i16> %y, %hsub = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %or1, <16 x i16> %or2) %conv = trunc <16 x i16> %hsub to <16 x i8> ret <16 x i8> %conv } define <8 x i16> @hadd_extract_2st_trunc_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: hadd_extract_2st_trunc_v8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %and1 = and <8 x i32> %x, %and2 = and <8 x i32> %y, %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2) %andr = and <8 x i32> %hadd, %conv = trunc <8 x i32> %andr to <8 x i16> ret <8 x i16> %conv } define <8 x i16> @hadd_extract_8th_trunc_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: hadd_extract_8th_trunc_v8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %and1 = and <8 x i32> %x, %and2 = and <8 x i32> %y, %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2) %andr = and <8 x i32> %hadd, %conv = trunc <8 x i32> %andr to <8 x i16> ret <8 x i16> %conv } define <8 x i16> @hadd_extract_2st_trunc_redundant_and_v4i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: hadd_extract_2st_trunc_redundant_and_v4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %and1 = and <8 x i32> %x, %and2 = and <8 x i32> %y, %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2) %andr = and <8 x i32> %hadd, %conv = trunc <8 x i32> %andr to <8 x i16> ret <8 x i16> %conv } define <8 x i16> @hadd_extract_4th_trunc_redundant_and_v4i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: hadd_extract_4th_trunc_redundant_and_v4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [3,3,3,3,3,3,3,3] ; CHECK-NEXT: vpand %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %and1 = and <8 x i32> %x, %and2 = and <8 x i32> %y, %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2) %andr = and <8 x i32> %hadd, %conv = trunc <8 x i32> %andr to <8 x i16> ret <8 x i16> %conv }