diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-insertelt.ll | 4 |
2 files changed, 12 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 88004c8..cb6a87a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47269,12 +47269,18 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); - assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) || - (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16) || - N->getOpcode() == ISD::INSERT_VECTOR_ELT) && + unsigned Opcode = N->getOpcode(); + assert(((Opcode == X86ISD::PINSRB && VT == MVT::v16i8) || + (Opcode == X86ISD::PINSRW && VT == MVT::v8i16) || + Opcode == ISD::INSERT_VECTOR_ELT) && "Unexpected vector insertion"); - if (N->getOpcode() == X86ISD::PINSRB || N->getOpcode() == X86ISD::PINSRW) { + // Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt). + if (Opcode == ISD::INSERT_VECTOR_ELT && N->getOperand(0).isUndef() && + isNullConstant(N->getOperand(2))) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, N->getOperand(1)); + + if (Opcode == X86ISD::PINSRB || Opcode == X86ISD::PINSRW) { unsigned NumBitsPerElt = VT.getScalarSizeInBits(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(SDValue(N, 0), diff --git a/llvm/test/CodeGen/X86/avx-insertelt.ll b/llvm/test/CodeGen/X86/avx-insertelt.ll index 23b1a03..d3ac3f1 100644 --- a/llvm/test/CodeGen/X86/avx-insertelt.ll +++ b/llvm/test/CodeGen/X86/avx-insertelt.ll @@ -422,7 +422,7 @@ define <8 x i32> @insert_i32_two_elts_of_high_subvector(<8 x i32> %x, i32 %s) { define <4 x i64> @insert_i64_two_elts_of_high_subvector(<4 x i64> %x, i64 %s) { ; AVX-LABEL: insert_i64_two_elts_of_high_subvector: ; AVX: # %bb.0: -; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1 +; AVX-NEXT: vmovq %rdi, %xmm1 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq @@ -523,7 +523,7 @@ define <8 x i32> @insert_i32_two_elts_of_low_subvector(<8 x i32> %x, i32 %s) { define <4 x i64> @insert_i64_two_elts_of_low_subvector(<4 x i64> %x, i64 %s) { ; AVX-LABEL: insert_i64_two_elts_of_low_subvector: ; AVX: # %bb.0: -; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1 +; AVX-NEXT: vmovq %rdi, %xmm1 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX-NEXT: retq |