diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2025-03-17 17:32:03 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2025-03-17 19:15:37 +0000 |
| commit | 20cdffbd2761b2cb33e0968a78de9a427d0ef555 (patch) | |
| tree | 9a0fe11d7060c3ab5222f624cbe5264dea462b76 | |
| parent | 24e88b0e6bc04f16d7353ad9ef07398836adf244 (diff) | |
| download | llvm-20cdffbd2761b2cb33e0968a78de9a427d0ef555.zip llvm-20cdffbd2761b2cb33e0968a78de9a427d0ef555.tar.gz llvm-20cdffbd2761b2cb33e0968a78de9a427d0ef555.tar.bz2 | |
[X86] combineConcatVectorOps - extend VPERMILPD handling to support 512-bit types
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll | 10 |
2 files changed, 12 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d3e6bff..ee2c72b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58113,11 +58113,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, DAG.getNode(X86ISD::VPERMILPI, DL, FloatVT, Res, Op0.getOperand(1)); return DAG.getBitcast(VT, Res); } - // TODO: v8f64 VPERMILPI concatenation. - if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) { - uint64_t Idx0 = Ops[0].getConstantOperandVal(1); - uint64_t Idx1 = Ops[1].getConstantOperandVal(1); - uint64_t Idx = ((Idx1 & 3) << 2) | (Idx0 & 3); + if (!IsSplat && (VT == MVT::v4f64 || VT == MVT::v8f64)) { + unsigned NumSubElts = Op0.getValueType().getVectorNumElements(); + uint64_t Mask = (1ULL << NumSubElts) - 1; + uint64_t Idx = 0; + for (unsigned I = 0; I != NumOps; ++I) { + uint64_t SubIdx = Ops[I].getConstantOperandVal(1); + Idx |= (SubIdx & Mask) << (I * NumSubElts); + } return DAG.getNode(X86ISD::VPERMILPI, DL, VT, ConcatSubOperand(VT, Ops, 0), DAG.getTargetConstant(Idx, DL, MVT::i8)); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 37b2d13..fce98cd 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -1678,11 +1678,10 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1 -; AVX512F-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2] -; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: concat_shuffle_v8f64_v2f64_10325476: @@ -1691,10 +1690,9 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x ; AVX512F-32-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 ; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX512F-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; AVX512F-32-NEXT: vinsertf128 $1, {{[0-9]+}}(%esp), %ymm2, %ymm1 -; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2] ; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6] ; AVX512F-32-NEXT: addl $12, %esp ; AVX512F-32-NEXT: retl %s0 = shufflevector <2 x double> %a0, <2 x double> poison, <2 x i32> <i32 1, i32 0> |
