aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2025-03-17 17:32:03 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2025-03-17 19:15:37 +0000
commit20cdffbd2761b2cb33e0968a78de9a427d0ef555 (patch)
tree9a0fe11d7060c3ab5222f624cbe5264dea462b76
parent24e88b0e6bc04f16d7353ad9ef07398836adf244 (diff)
downloadllvm-20cdffbd2761b2cb33e0968a78de9a427d0ef555.zip
llvm-20cdffbd2761b2cb33e0968a78de9a427d0ef555.tar.gz
llvm-20cdffbd2761b2cb33e0968a78de9a427d0ef555.tar.bz2
[X86] combineConcatVectorOps - extend VPERMILPD handling to support 512-bit types
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll10
2 files changed, 12 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d3e6bff..ee2c72b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58113,11 +58113,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
DAG.getNode(X86ISD::VPERMILPI, DL, FloatVT, Res, Op0.getOperand(1));
return DAG.getBitcast(VT, Res);
}
- // TODO: v8f64 VPERMILPI concatenation.
- if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
- uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
- uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
- uint64_t Idx = ((Idx1 & 3) << 2) | (Idx0 & 3);
+ if (!IsSplat && (VT == MVT::v4f64 || VT == MVT::v8f64)) {
+ unsigned NumSubElts = Op0.getValueType().getVectorNumElements();
+ uint64_t Mask = (1ULL << NumSubElts) - 1;
+ uint64_t Idx = 0;
+ for (unsigned I = 0; I != NumOps; ++I) {
+ uint64_t SubIdx = Ops[I].getConstantOperandVal(1);
+ Idx |= (SubIdx & Mask) << (I * NumSubElts);
+ }
return DAG.getNode(X86ISD::VPERMILPI, DL, VT,
ConcatSubOperand(VT, Ops, 0),
DAG.getTargetConstant(Idx, DL, MVT::i8));
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 37b2d13..fce98cd 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -1678,11 +1678,10 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX512F-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2]
-; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: concat_shuffle_v8f64_v2f64_10325476:
@@ -1691,10 +1690,9 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x
; AVX512F-32-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512F-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; AVX512F-32-NEXT: vinsertf128 $1, {{[0-9]+}}(%esp), %ymm2, %ymm1
-; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2]
; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%s0 = shufflevector <2 x double> %a0, <2 x double> poison, <2 x i32> <i32 1, i32 0>