; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s ; ; UNDEF Elts ; define <8 x i16> @undef_pmulhu_128(<8 x i16> %a0) { ; CHECK-LABEL: @undef_pmulhu_128( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> undef) ret <8 x i16> %1 } define <8 x i16> @undef_pmulhu_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @undef_pmulhu_128_commute( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> undef, <8 x i16> %a0) ret <8 x i16> %1 } define <16 x i16> @undef_pmulhu_256(<16 x i16> %a0) { ; CHECK-LABEL: @undef_pmulhu_256( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> undef) ret <16 x i16> %1 } define <16 x i16> @undef_pmulhu_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @undef_pmulhu_256_commute( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> undef, <16 x i16> %a0) ret <16 x i16> %1 } define <32 x i16> @undef_pmulhu_512(<32 x i16> %a0) { ; CHECK-LABEL: @undef_pmulhu_512( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> undef) ret <32 x i16> %1 } define <32 x i16> @undef_pmulhu_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @undef_pmulhu_512_commute( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> undef, <32 x i16> %a0) ret <32 x i16> %1 } ; ; Zero Elts ; define <8 x i16> @zero_pmulhu_128(<8 x i16> %a0) { ; CHECK-LABEL: @zero_pmulhu_128( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @zero_pmulhu_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @zero_pmulhu_128_commute( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> zeroinitializer, <8 x i16> %a0) ret <8 x i16> %1 } define <16 x i16> @zero_pmulhu_256(<16 x i16> %a0) { ; CHECK-LABEL: @zero_pmulhu_256( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @zero_pmulhu_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @zero_pmulhu_256_commute( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> zeroinitializer, <16 x i16> %a0) ret <16 x i16> %1 } define <32 x i16> @zero_pmulhu_512(<32 x i16> %a0) { ; CHECK-LABEL: @zero_pmulhu_512( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @zero_pmulhu_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @zero_pmulhu_512_commute( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> zeroinitializer, <32 x i16> %a0) ret <32 x i16> %1 } ; ; Multiply by One ; define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_128( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_128_commute( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> , <8 x i16> %a0) ret <8 x i16> %1 } define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_256( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> ) ret <16 x i16> %1 } define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_256_commute( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> , <16 x i16> %a0) ret <16 x i16> %1 } define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_512( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> ) ret <32 x i16> %1 } define <32 x i16> @one_pmulhu_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_512_commute( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> , <32 x i16> %a0) ret <32 x i16> %1 } ; ; Constant Folding ; define <8 x i16> @fold_pmulhu_128() { ; CHECK-LABEL: @fold_pmulhu_128( ; CHECK-NEXT: ret <8 x i16> ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> , <8 x i16> ) ret <8 x i16> %1 } define <16 x i16> @fold_pmulhu_256() { ; CHECK-LABEL: @fold_pmulhu_256( ; CHECK-NEXT: ret <16 x i16> ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> , <16 x i16> ) ret <16 x i16> %1 } define <32 x i16> @fold_pmulhu_512() { ; CHECK-LABEL: @fold_pmulhu_512( ; CHECK-NEXT: ret <32 x i16> ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> , <32 x i16> ) ret <32 x i16> %1 } ; ; Demanded Elts ; define <8 x i16> @elts_pmulhu_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @elts_pmulhu_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> %2 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2) %4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> zeroinitializer ret <8 x i16> %4 } define <16 x i16> @elts_pmulhu_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @elts_pmulhu_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> %2 = shufflevector <16 x i16> %a1, <16 x i16> undef, <16 x i32> %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2) %4 = shufflevector <16 x i16> %3, <16 x i16> poison, <16 x i32> zeroinitializer ret <16 x i16> %4 } define <32 x i16> @elts_pmulhu_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @elts_pmulhu_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> %2 = shufflevector <32 x i16> %a1, <32 x i16> undef, <32 x i32> %3 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %1, <32 x i16> %2) %4 = shufflevector <32 x i16> %3, <32 x i16> poison, <32 x i32> zeroinitializer ret <32 x i16> %4 } ; ; Known Bits ; define <8 x i16> @known_pmulhu_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { ; CHECK-LABEL: @known_pmulhu_128( ; CHECK-NEXT: ret <8 x i16> [[A2:%.*]] ; %x0 = lshr <8 x i16> %a0, %x1 = and <8 x i16> %a1, %m = tail call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1) %r = add <8 x i16> %m, %a2 ret <8 x i16> %r } define <16 x i16> @known_pmulhu_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) { ; CHECK-LABEL: @known_pmulhu_256( ; CHECK-NEXT: ret <16 x i16> [[A2:%.*]] ; %x0 = lshr <16 x i16> %a0, %x1 = and <16 x i16> %a1, %m = tail call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1) %r = add <16 x i16> %m, %a2 ret <16 x i16> %r } define <32 x i16> @known_pmulhu_512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %a2) { ; CHECK-LABEL: @known_pmulhu_512( ; CHECK-NEXT: ret <32 x i16> [[A2:%.*]] ; %x0 = lshr <32 x i16> %a0, %x1 = and <32 x i16> %a1, %m = tail call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) %r = add <32 x i16> %m, %a2 ret <32 x i16> %r }