; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s ; ; UNDEF Elts ; define <8 x i16> @undef_pmaddubsw_128(<16 x i8> %a0) { ; CHECK-LABEL: @undef_pmaddubsw_128( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> undef) ret <8 x i16> %1 } define <8 x i16> @undef_pmaddubsw_128_commute(<16 x i8> %a0) { ; CHECK-LABEL: @undef_pmaddubsw_128_commute( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> undef, <16 x i8> %a0) ret <8 x i16> %1 } define <16 x i16> @undef_pmaddubsw_256(<32 x i8> %a0) { ; CHECK-LABEL: @undef_pmaddubsw_256( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> undef) ret <16 x i16> %1 } define <16 x i16> @undef_pmaddubsw_256_commute(<32 x i8> %a0) { ; CHECK-LABEL: @undef_pmaddubsw_256_commute( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> undef, <32 x i8> %a0) ret <16 x i16> %1 } define <32 x i16> @undef_pmaddubsw_512(<64 x i8> %a0) { ; CHECK-LABEL: @undef_pmaddubsw_512( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> undef) ret <32 x i16> %1 } define <32 x i16> @undef_pmaddubsw_512_commute(<64 x i8> %a0) { ; CHECK-LABEL: @undef_pmaddubsw_512_commute( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> undef, <64 x i8> %a0) ret <32 x i16> %1 } ; ; Zero Elts ; define <8 x i16> @zero_pmaddubsw_128(<16 x i8> %a0) { ; CHECK-LABEL: @zero_pmaddubsw_128( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @zero_pmaddubsw_128_commute(<16 x i8> %a0) { ; CHECK-LABEL: @zero_pmaddubsw_128_commute( ; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> zeroinitializer, <16 x i8> %a0) ret <8 x i16> %1 } define <16 x i16> @zero_pmaddubsw_256(<32 x i8>%a0) { ; CHECK-LABEL: @zero_pmaddubsw_256( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @zero_pmaddubsw_256_commute(<32 x i8> %a0) { ; CHECK-LABEL: @zero_pmaddubsw_256_commute( ; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> zeroinitializer, <32 x i8> %a0) ret <16 x i16> %1 } define <32 x i16> @zero_pmaddubsw_512(<64 x i8> %a0) { ; CHECK-LABEL: @zero_pmaddubsw_512( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> zeroinitializer) ret <32 x i16> %1 } define <32 x i16> @zero_pmaddubsw_512_commute(<64 x i8> %a0) { ; CHECK-LABEL: @zero_pmaddubsw_512_commute( ; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> zeroinitializer, <64 x i8> %a0) ret <32 x i16> %1 } ; ; Constant Folding ; define <8 x i16> @fold_pmaddubsw_128() { ; CHECK-LABEL: @fold_pmaddubsw_128( ; CHECK-NEXT: ret <8 x i16> ; %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> , <16 x i8> ) ret <8 x i16> %1 } define <16 x i16> @fold_pmaddubsw_256() { ; CHECK-LABEL: @fold_pmaddubsw_256( ; CHECK-NEXT: ret <16 x i16> ; %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> , <32 x i8> ) ret <16 x i16> %1 } define <32 x i16> @fold_pmaddubsw_512() { ; CHECK-LABEL: @fold_pmaddubsw_512( ; CHECK-NEXT: ret <32 x i16> ; %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> , <64 x i8> ) ret <32 x i16> %1 } ; ; Demanded Elts ; define <8 x i16> @elts_pmaddubsw_128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: @elts_pmaddubsw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> %2 = shufflevector <16 x i8> %a1, <16 x i8> undef, <16 x i32> %3 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %1, <16 x i8> %2) %4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> zeroinitializer ret <8 x i16> %4 } define <16 x i16> @elts_pmaddubsw_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: @elts_pmaddubsw_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> %2 = shufflevector <32 x i8> %a1, <32 x i8> undef, <32 x i32> %3 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %1, <32 x i8> %2) %4 = shufflevector <16 x i16> %3, <16 x i16> poison, <16 x i32> zeroinitializer ret <16 x i16> %4 } define <32 x i16> @elts_pmaddubsw_512(<64 x i8> %a0, <64 x i8> %a1) { ; CHECK-LABEL: @elts_pmaddubsw_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[A0:%.*]], <64 x i8> [[A1:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> %2 = shufflevector <64 x i8> %a1, <64 x i8> undef, <64 x i32> %3 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %1, <64 x i8> %2) %4 = shufflevector <32 x i16> %3, <32 x i16> poison, <32 x i32> zeroinitializer ret <32 x i16> %4 }