aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/VectorUtils.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2020-04-12 09:17:59 -0400
committerSanjay Patel <spatel@rotateright.com>2020-04-12 10:14:19 -0400
commitc23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92 (patch)
treef694be2ac0142c58ad40cf8d257726d8a250f452 /llvm/lib/Analysis/VectorUtils.cpp
parent101a69d71b93f901561621508ed36b187e594d91 (diff)
downloadllvm-c23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92.zip
llvm-c23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92.tar.gz
llvm-c23cbefd9d73e8551b0c8b6cc3f14bb6b067ca92.tar.bz2
[VectorUtils] add IR-level analysis for widening of shuffle mask
This is similar to the recent move/addition of "scaleShuffleMask" (D76508), but there are a couple of differences: 1. The existing x86 helper (canWidenShuffleElements) always tries to divide-by-2, so it gets called iteratively and wouldn't handle the general case of non-pow-2 length. 2. The existing x86 code handles "SM_SentinelZero" - we don't have that in IR, but this code should be safe to use with that or other special (negative) values. The motivation is to enable shuffle folds in instcombine/vector-combine that are similar to D76844 and D76727, but in the reverse-bitcast direction. Those patterns are visible in the tests for D40633. Differential Revision: https://reviews.llvm.org/D77881
Diffstat (limited to 'llvm/lib/Analysis/VectorUtils.cpp')
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp51
1 files changed, 51 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index a5fa3ec..31b600b 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -420,6 +420,57 @@ void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
}
}
+bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &ScaledMask) {
+ assert(Scale > 0 && "Unexpected scaling factor");
+
+ // Fast-path: if no scaling, then it is just a copy.
+ if (Scale == 1) {
+ ScaledMask.assign(Mask.begin(), Mask.end());
+ return true;
+ }
+
+ // We must map the original elements down evenly to a type with less elements.
+ int NumElts = Mask.size();
+ if (NumElts % Scale != 0)
+ return false;
+
+ ScaledMask.clear();
+ ScaledMask.reserve(NumElts / Scale);
+
+ // Step through the input mask by splitting into Scale-sized slices.
+ do {
+ ArrayRef<int> MaskSlice = Mask.take_front(Scale);
+ assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
+
+ // The first element of the slice determines how we evaluate this slice.
+ int SliceFront = MaskSlice.front();
+ if (SliceFront < 0) {
+ // Negative values (undef or other "sentinel" values) must be equal across
+ // the entire slice.
+ if (!is_splat(MaskSlice))
+ return false;
+ ScaledMask.push_back(SliceFront);
+ } else {
+ // A positive mask element must be cleanly divisible.
+ if (SliceFront % Scale != 0)
+ return false;
+ // Elements of the slice must be consecutive.
+ for (int i = 1; i < Scale; ++i)
+ if (MaskSlice[i] != SliceFront + i)
+ return false;
+ ScaledMask.push_back(SliceFront / Scale);
+ }
+ Mask = Mask.drop_front(Scale);
+ } while (!Mask.empty());
+
+ assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
+
+ // All elements of the original mask can be scaled down to map to the elements
+ // of a mask with wider elements.
+ return true;
+}
+
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {