diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/insertelement-ones.ll | 15 |
2 files changed, 11 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 912683f..289c285 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19789,9 +19789,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); if (IsZeroElt || IsAllOnesElt) { - // Lower insertion of i8 -1 as an 'OR' blend. + // Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend. // We don't deal with i8 0 since it appears to be handled elsewhere. - if (IsAllOnesElt && EltSizeInBits == 8 && !Subtarget.hasSSE41()) { + if (IsAllOnesElt && + ((VT == MVT::v16i8 && !Subtarget.hasSSE41()) || + ((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.hasInt256()))) { SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType()); SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType()); SmallVector<SDValue, 8> CstVectorElts(NumElts, ZeroCst); diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 577b662..5470fae 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -280,7 +280,8 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) { ; ; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx: ; AVX1: # %bb.0: -; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [65535,0,0,0] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -384,13 +385,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; ; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx: ; AVX1: # %bb.0: -; AVX1-NEXT: movl $255, %eax -; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 -; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,0,0] +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx: |