[ARM] Convert floating point splats to integer

Under MVE a vdup will always take a gpr register, not a floating point value. During DAG combine we convert the types to a bitcast to an integer in an attempt to fold the bitcast into other instructions. This is OK, but only works inside the same basic block. To do the same trick across a basic block boundary we need to convert the type in codegenprepare, before the splat is sunk into the loop. This adds a convertSplatType function to codegenprepare to do that, putting bitcasts around the splat to force the type to an integer. There is then some adjustment to the code in shouldSinkOperands to handle the extra bitcasts. Differential Revision: https://reviews.llvm.org/D78728
author: David Green <david.green@arm.com> 2020-05-13 14:35:32 +0100
committer: David Green <david.green@arm.com> 2020-05-13 15:24:16 +0100
commit: fa15255d8af53126bbcb017f2fb6f9961e8574df (patch)
tree: 70f59ccbc5a55e948d0981a986f6c5515b09c3ad /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent: a5d80818fa702876ced0e62bffcd24d3d9bf43b9 (diff)
download: llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.zip
llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.tar.gz
llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.tar.bz2
1 files changed, 55 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 881366c..60cc995 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -392,6 +392,8 @@ class TypePromotionTransaction;
     bool optimizeLoadExt(LoadInst *Load);
     bool optimizeShiftInst(BinaryOperator *BO);
     bool optimizeSelectInst(SelectInst *SI);
+    bool sinkShuffleVectorToShift(ShuffleVectorInst *SVI);
+    bool convertSplatType(ShuffleVectorInst *SVI);
     bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
     bool optimizeSwitchInst(SwitchInst *SI);
     bool optimizeExtractElementInst(Instruction *Inst);
@@ -6419,7 +6421,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
 /// it's often worth sinking a shufflevector splat down to its use so that
 /// codegen can spot all lanes are identical.
-bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+bool CodeGenPrepare::sinkShuffleVectorToShift(ShuffleVectorInst *SVI) {
   BasicBlock *DefBB = SVI->getParent();
 
   // Only do this xform if variable vector shifts are particularly expensive.
@@ -6471,6 +6473,58 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   return MadeChange;
 }
 
+/// Some targets only accept certain types for splat inputs. For example a VDUP
+/// in MVE takes a GPR (integer) register, and the instruction that incorporate
+/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
+bool CodeGenPrepare::convertSplatType(ShuffleVectorInst *SVI) {
+  if (!match(SVI,
+             m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()),
+                             m_Undef(), m_ZeroMask())))
+    return false;
+  Type *NewType = TLI->shouldConvertSplatType(SVI);
+  if (!NewType)
+    return false;
+
+  VectorType *SVIVecType = cast<VectorType>(SVI->getType());
+  Type *SVIType = SVIVecType->getScalarType();
+  assert(!NewType->isVectorTy() && "Expected a scalar type!");
+  assert(NewType->getScalarSizeInBits() == SVIType->getScalarSizeInBits() &&
+         "Expected a type of the same size!");
+  Type *NewVecType = VectorType::get(NewType, SVIVecType->getNumElements());
+
+  // Create a bitcast (shuffle (insert (bitcast(..))))
+  IRBuilder<> Builder(SVI->getContext());
+  Builder.SetInsertPoint(SVI);
+  Value *BC1 = Builder.CreateBitCast(
+      cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
+  Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1,
+                                              (uint64_t)0);
+  Value *Shuffle = Builder.CreateShuffleVector(
+      Insert, UndefValue::get(NewVecType), SVI->getShuffleMask());
+  Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
+
+  SVI->replaceAllUsesWith(BC2);
+  RecursivelyDeleteTriviallyDeadInstructions(SVI);
+
+  // Also hoist the bitcast up to its operand if it they are not in the same
+  // block.
+  if (auto *BCI = dyn_cast<Instruction>(BC1))
+    if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
+      if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
+          !Op->isTerminator() && !Op->isEHPad())
+        BCI->moveAfter(Op);
+
+  return true;
+}
+
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+  if (sinkShuffleVectorToShift(SVI))
+    return true;
+  if (convertSplatType(SVI))
+    return true;
+  return false;
+}
+
 bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
   // If the operands of I can be folded into a target instruction together with
   // I, duplicate and sink them.
author	David Green <david.green@arm.com>	2020-05-13 14:35:32 +0100
committer	David Green <david.green@arm.com>	2020-05-13 15:24:16 +0100
commit	fa15255d8af53126bbcb017f2fb6f9961e8574df (patch)
tree	70f59ccbc5a55e948d0981a986f6c5515b09c3ad /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent	a5d80818fa702876ced0e62bffcd24d3d9bf43b9 (diff)
download	llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.zip llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.tar.gz llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.tar.bz2