aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/VectorUtils.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@outlook.com>2024-12-06 12:27:00 -0500
committerGitHub <noreply@github.com>2024-12-06 12:27:00 -0500
commitb9aa155d26935c058449f4ac116201de000fd7bf (patch)
treecd257d3d2f4e6f845bc1bc3e1b3a4c7f78de7298 /llvm/lib/Analysis/VectorUtils.cpp
parent2e33ed9ecc52fcec27eac2efb2615d1efcf6fd32 (diff)
downloadllvm-b9aa155d26935c058449f4ac116201de000fd7bf.zip
llvm-b9aa155d26935c058449f4ac116201de000fd7bf.tar.gz
llvm-b9aa155d26935c058449f4ac116201de000fd7bf.tar.bz2
[TTI][X86]Fix detection of the shuffles from the second shuffle operand only
If the shuffle mask uses only indices from the second shuffle operand, processShuffleMasks function misses it currently, which prevents correct cost estimation in this corner case. To fix this, need to raise the limit to 2 * VF rather than just VF and adjust processing correspondingly. Will allow future improvements for 2 sources permutations. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/118972
Diffstat (limited to 'llvm/lib/Analysis/VectorUtils.cpp')
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp13
1 files changed, 7 insertions, 6 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 989090b..5f7aa53 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -504,25 +504,26 @@ void llvm::processShuffleMasks(
unsigned SzSrc = Sz / NumOfSrcRegs;
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
auto &RegMasks = Res[I];
- RegMasks.assign(NumOfSrcRegs, {});
+ RegMasks.assign(2 * NumOfSrcRegs, {});
// Check that the values in dest registers are in the one src
// register.
for (unsigned K = 0; K < SzDest; ++K) {
int Idx = I * SzDest + K;
if (Idx == Sz)
break;
- if (Mask[Idx] >= Sz || Mask[Idx] == PoisonMaskElem)
+ if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)
continue;
- int SrcRegIdx = Mask[Idx] / SzSrc;
+ int MaskIdx = Mask[Idx] % Sz;
+ int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
// Add a cost of PermuteTwoSrc for each new source register permute,
// if we have more than one source registers.
if (RegMasks[SrcRegIdx].empty())
RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
- RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
+ RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
}
}
// Process split mask.
- for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
+ for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {
auto &Dest = Res[I];
int NumSrcRegs =
count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
@@ -567,7 +568,7 @@ void llvm::processShuffleMasks(
int FirstIdx = -1;
SecondIdx = -1;
MutableArrayRef<int> FirstMask, SecondMask;
- for (unsigned I = 0; I < NumOfDestRegs; ++I) {
+ for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
SmallVectorImpl<int> &RegMask = Dest[I];
if (RegMask.empty())
continue;