diff options
author | Alexey Bataev <a.bataev@outlook.com> | 2024-12-06 12:27:00 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-06 12:27:00 -0500 |
commit | b9aa155d26935c058449f4ac116201de000fd7bf (patch) | |
tree | cd257d3d2f4e6f845bc1bc3e1b3a4c7f78de7298 /llvm/lib/Analysis/VectorUtils.cpp | |
parent | 2e33ed9ecc52fcec27eac2efb2615d1efcf6fd32 (diff) | |
download | llvm-b9aa155d26935c058449f4ac116201de000fd7bf.zip llvm-b9aa155d26935c058449f4ac116201de000fd7bf.tar.gz llvm-b9aa155d26935c058449f4ac116201de000fd7bf.tar.bz2 |
[TTI][X86]Fix detection of the shuffles from the second shuffle operand only
If the shuffle mask uses only indices from the second shuffle operand,
processShuffleMasks function misses it currently, which prevents correct
cost estimation in this corner case. To fix this, need to raise the
limit to 2 * VF rather than just VF and adjust processing
correspondingly. Will allow future improvements for 2 sources
permutations.
Reviewers: RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/118972
Diffstat (limited to 'llvm/lib/Analysis/VectorUtils.cpp')
-rw-r--r-- | llvm/lib/Analysis/VectorUtils.cpp | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 989090b..5f7aa53 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -504,25 +504,26 @@ void llvm::processShuffleMasks( unsigned SzSrc = Sz / NumOfSrcRegs; for (unsigned I = 0; I < NumOfDestRegs; ++I) { auto &RegMasks = Res[I]; - RegMasks.assign(NumOfSrcRegs, {}); + RegMasks.assign(2 * NumOfSrcRegs, {}); // Check that the values in dest registers are in the one src // register. for (unsigned K = 0; K < SzDest; ++K) { int Idx = I * SzDest + K; if (Idx == Sz) break; - if (Mask[Idx] >= Sz || Mask[Idx] == PoisonMaskElem) + if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem) continue; - int SrcRegIdx = Mask[Idx] / SzSrc; + int MaskIdx = Mask[Idx] % Sz; + int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0); // Add a cost of PermuteTwoSrc for each new source register permute, // if we have more than one source registers. if (RegMasks[SrcRegIdx].empty()) RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem); - RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc; + RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc; } } // Process split mask. - for (unsigned I = 0; I < NumOfUsedRegs; ++I) { + for (unsigned I : seq<unsigned>(NumOfUsedRegs)) { auto &Dest = Res[I]; int NumSrcRegs = count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); }); @@ -567,7 +568,7 @@ void llvm::processShuffleMasks( int FirstIdx = -1; SecondIdx = -1; MutableArrayRef<int> FirstMask, SecondMask; - for (unsigned I = 0; I < NumOfDestRegs; ++I) { + for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) { SmallVectorImpl<int> &RegMask = Dest[I]; if (RegMask.empty()) continue; |