diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pmulh.ll | 31 |
2 files changed, 9 insertions, 29 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 65dc107..d9eedfd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53022,9 +53022,10 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, // Check if both inputs are extensions, which will be removed by truncation. auto isOpTruncateFree = [](SDValue Op) { - return (Op.getOpcode() == ISD::SIGN_EXTEND || - Op.getOpcode() == ISD::ZERO_EXTEND) && - Op.getOperand(0).getScalarValueSizeInBits() <= 16; + if (Op.getOpcode() == ISD::SIGN_EXTEND || + Op.getOpcode() == ISD::ZERO_EXTEND) + return Op.getOperand(0).getScalarValueSizeInBits() <= 16; + return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); }; bool IsTruncateFree = isOpTruncateFree(LHS) && isOpTruncateFree(RHS); diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll index 65337b8..502249a 100644 --- a/llvm/test/CodeGen/X86/pmulh.ll +++ b/llvm/test/CodeGen/X86/pmulh.ll @@ -974,32 +974,11 @@ define <16 x i16> @zext_mulhuw_v16i16_positive_constant(<16 x i16> %a) { ; SSE-NEXT: pmulhw %xmm2, %xmm1 ; SSE-NEXT: retq ; -; AVX2-LABEL: zext_mulhuw_v16i16_positive_constant: -; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0] -; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: retq -; -; AVX512F-LABEL: zext_mulhuw_v16i16_positive_constant: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000] -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: zext_mulhuw_v16i16_positive_constant: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0] -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: retq +; AVX-LABEL: zext_mulhuw_v16i16_positive_constant: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000] +; AVX-NEXT: retq %k = and <16 x i16> %a, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> %x = zext nneg <16 x i16> %k to <16 x i32> %m = mul nuw nsw <16 x i32> %x, <i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000> |