diff options
author | Sanjay Patel <spatel@rotateright.com> | 2020-04-12 09:17:59 -0400 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2020-04-12 10:14:19 -0400 |
commit | c23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92 (patch) | |
tree | f694be2ac0142c58ad40cf8d257726d8a250f452 /llvm/lib/Analysis/VectorUtils.cpp | |
parent | 101a69d71b93f901561621508ed36b187e594d91 (diff) | |
download | llvm-c23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92.zip llvm-c23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92.tar.gz llvm-c23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92.tar.bz2 |
[VectorUtils] add IR-level analysis for widening of shuffle mask
This is similar to the recent move/addition of "scaleShuffleMask" (D76508),
but there are a couple of differences:
1. The existing x86 helper (canWidenShuffleElements) always tries to
divide-by-2, so it gets called iteratively and wouldn't handle the
general case of non-pow-2 length.
2. The existing x86 code handles "SM_SentinelZero" - we don't have
that in IR, but this code should be safe to use with that or other
special (negative) values.
The motivation is to enable shuffle folds in instcombine/vector-combine
that are similar to D76844 and D76727, but in the reverse-bitcast direction.
Those patterns are visible in the tests for D40633.
Differential Revision: https://reviews.llvm.org/D77881
Diffstat (limited to 'llvm/lib/Analysis/VectorUtils.cpp')
-rw-r--r-- | llvm/lib/Analysis/VectorUtils.cpp | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index a5fa3ec..31b600b 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -420,6 +420,57 @@ void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, } } +bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, + SmallVectorImpl<int> &ScaledMask) { + assert(Scale > 0 && "Unexpected scaling factor"); + + // Fast-path: if no scaling, then it is just a copy. + if (Scale == 1) { + ScaledMask.assign(Mask.begin(), Mask.end()); + return true; + } + + // We must map the original elements down evenly to a type with less elements. + int NumElts = Mask.size(); + if (NumElts % Scale != 0) + return false; + + ScaledMask.clear(); + ScaledMask.reserve(NumElts / Scale); + + // Step through the input mask by splitting into Scale-sized slices. + do { + ArrayRef<int> MaskSlice = Mask.take_front(Scale); + assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice."); + + // The first element of the slice determines how we evaluate this slice. + int SliceFront = MaskSlice.front(); + if (SliceFront < 0) { + // Negative values (undef or other "sentinel" values) must be equal across + // the entire slice. + if (!is_splat(MaskSlice)) + return false; + ScaledMask.push_back(SliceFront); + } else { + // A positive mask element must be cleanly divisible. + if (SliceFront % Scale != 0) + return false; + // Elements of the slice must be consecutive. + for (int i = 1; i < Scale; ++i) + if (MaskSlice[i] != SliceFront + i) + return false; + ScaledMask.push_back(SliceFront / Scale); + } + Mask = Mask.drop_front(Scale); + } while (!Mask.empty()); + + assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask"); + + // All elements of the original mask can be scaled down to map to the elements + // of a mask with wider elements. + return true; +} + MapVector<Instruction *, uint64_t> llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { |