; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s ; ; UNDEF Elts ; define <4 x i32> @undef_pmaddwd_128(<8 x i16> %a0) { ; CHECK-LABEL: @undef_pmaddwd_128( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> undef) ret <4 x i32> %1 } define <4 x i32> @undef_pmaddwd_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @undef_pmaddwd_128_commute( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> %a0) ret <4 x i32> %1 } define <8 x i32> @undef_pmaddwd_256(<16 x i16> %a0) { ; CHECK-LABEL: @undef_pmaddwd_256( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> undef) ret <8 x i32> %1 } define <8 x i32> @undef_pmaddwd_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @undef_pmaddwd_256_commute( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> %a0) ret <8 x i32> %1 } define <16 x i32> @undef_pmaddwd_512(<32 x i16> %a0) { ; CHECK-LABEL: @undef_pmaddwd_512( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> undef) ret <16 x i32> %1 } define <16 x i32> @undef_pmaddwd_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @undef_pmaddwd_512_commute( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> %a0) ret <16 x i32> %1 } ; ; Zero Elts ; define <4 x i32> @zero_pmaddwd_128(<8 x i16> %a0) { ; CHECK-LABEL: @zero_pmaddwd_128( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> zeroinitializer) ret <4 x i32> %1 } define <4 x i32> @zero_pmaddwd_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @zero_pmaddwd_128_commute( ; CHECK-NEXT: ret <4 x i32> zeroinitializer ; %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> zeroinitializer, <8 x i16> %a0) ret <4 x i32> %1 } define <8 x i32> @zero_pmaddwd_256(<16 x i16> %a0) { ; CHECK-LABEL: @zero_pmaddwd_256( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> zeroinitializer) ret <8 x i32> %1 } define <8 x i32> @zero_pmaddwd_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @zero_pmaddwd_256_commute( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> zeroinitializer, <16 x i16> %a0) ret <8 x i32> %1 } define <16 x i32> @zero_pmaddwd_512(<32 x i16> %a0) { ; CHECK-LABEL: @zero_pmaddwd_512( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> zeroinitializer) ret <16 x i32> %1 } define <16 x i32> @zero_pmaddwd_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @zero_pmaddwd_512_commute( ; CHECK-NEXT: ret <16 x i32> zeroinitializer ; %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> zeroinitializer, <32 x i16> %a0) ret <16 x i32> %1 } ; ; Constant Folding ; define <4 x i32> @fold_pmaddwd_128() { ; CHECK-LABEL: @fold_pmaddwd_128( ; CHECK-NEXT: ret <4 x i32> ; %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> , <8 x i16> ) ret <4 x i32> %1 } define <8 x i32> @fold_pmaddwd_256() { ; CHECK-LABEL: @fold_pmaddwd_256( ; CHECK-NEXT: ret <8 x i32> ; %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> , <16 x i16> ) ret <8 x i32> %1 } define <16 x i32> @fold_pmaddwd_512() { ; CHECK-LABEL: @fold_pmaddwd_512( ; CHECK-NEXT: ret <16 x i32> ; %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> , <32 x i16> ) ret <16 x i32> %1 } ; ; Demanded Elts ; define <4 x i32> @elts_pmaddwd_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @elts_pmaddwd_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> %2 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> %3 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %1, <8 x i16> %2) %4 = shufflevector <4 x i32> %3, <4 x i32> poison, <4 x i32> zeroinitializer ret <4 x i32> %4 } define <8 x i32> @elts_pmaddwd_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @elts_pmaddwd_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> %2 = shufflevector <16 x i16> %a1, <16 x i16> undef, <16 x i32> %3 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %1, <16 x i16> %2) %4 = shufflevector <8 x i32> %3, <8 x i32> poison, <8 x i32> zeroinitializer ret <8 x i32> %4 } define <16 x i32> @elts_pmaddwd_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @elts_pmaddwd_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> %2 = shufflevector <32 x i16> %a1, <32 x i16> undef, <32 x i32> %3 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %1, <32 x i16> %2) %4 = shufflevector <16 x i32> %3, <16 x i32> poison, <16 x i32> zeroinitializer ret <16 x i32> %4 }