diff options
author | David Green <david.green@arm.com> | 2020-05-13 14:35:32 +0100 |
---|---|---|
committer | David Green <david.green@arm.com> | 2020-05-13 15:24:16 +0100 |
commit | fa15255d8af53126bbcb017f2fb6f9961e8574df (patch) | |
tree | 70f59ccbc5a55e948d0981a986f6c5515b09c3ad /llvm/lib/CodeGen/CodeGenPrepare.cpp | |
parent | a5d80818fa702876ced0e62bffcd24d3d9bf43b9 (diff) | |
download | llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.zip llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.tar.gz llvm-fa15255d8af53126bbcb017f2fb6f9961e8574df.tar.bz2 |
[ARM] Convert floating point splats to integer
Under MVE a vdup will always take a gpr register, not a floating point
value. During DAG combine we convert the types to a bitcast to an
integer in an attempt to fold the bitcast into other instructions. This
is OK, but only works inside the same basic block. To do the same trick
across a basic block boundary we need to convert the type in
codegenprepare, before the splat is sunk into the loop.
This adds a convertSplatType function to codegenprepare to do that,
putting bitcasts around the splat to force the type to an integer. There
is then some adjustment to the code in shouldSinkOperands to handle the
extra bitcasts.
Differential Revision: https://reviews.llvm.org/D78728
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 56 |
1 files changed, 55 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 881366c..60cc995 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -392,6 +392,8 @@ class TypePromotionTransaction; bool optimizeLoadExt(LoadInst *Load); bool optimizeShiftInst(BinaryOperator *BO); bool optimizeSelectInst(SelectInst *SI); + bool sinkShuffleVectorToShift(ShuffleVectorInst *SVI); + bool convertSplatType(ShuffleVectorInst *SVI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); @@ -6419,7 +6421,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases /// it's often worth sinking a shufflevector splat down to its use so that /// codegen can spot all lanes are identical. -bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { +bool CodeGenPrepare::sinkShuffleVectorToShift(ShuffleVectorInst *SVI) { BasicBlock *DefBB = SVI->getParent(); // Only do this xform if variable vector shifts are particularly expensive. @@ -6471,6 +6473,58 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { return MadeChange; } +/// Some targets only accept certain types for splat inputs. For example a VDUP +/// in MVE takes a GPR (integer) register, and the instruction that incorporate +/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. +bool CodeGenPrepare::convertSplatType(ShuffleVectorInst *SVI) { + if (!match(SVI, + m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()), + m_Undef(), m_ZeroMask()))) + return false; + Type *NewType = TLI->shouldConvertSplatType(SVI); + if (!NewType) + return false; + + VectorType *SVIVecType = cast<VectorType>(SVI->getType()); + Type *SVIType = SVIVecType->getScalarType(); + assert(!NewType->isVectorTy() && "Expected a scalar type!"); + assert(NewType->getScalarSizeInBits() == SVIType->getScalarSizeInBits() && + "Expected a type of the same size!"); + Type *NewVecType = VectorType::get(NewType, SVIVecType->getNumElements()); + + // Create a bitcast (shuffle (insert (bitcast(..)))) + IRBuilder<> Builder(SVI->getContext()); + Builder.SetInsertPoint(SVI); + Value *BC1 = Builder.CreateBitCast( + cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType); + Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1, + (uint64_t)0); + Value *Shuffle = Builder.CreateShuffleVector( + Insert, UndefValue::get(NewVecType), SVI->getShuffleMask()); + Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); + + SVI->replaceAllUsesWith(BC2); + RecursivelyDeleteTriviallyDeadInstructions(SVI); + + // Also hoist the bitcast up to its operand if it they are not in the same + // block. + if (auto *BCI = dyn_cast<Instruction>(BC1)) + if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0))) + if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) && + !Op->isTerminator() && !Op->isEHPad()) + BCI->moveAfter(Op); + + return true; +} + +bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { + if (sinkShuffleVectorToShift(SVI)) + return true; + if (convertSplatType(SVI)) + return true; + return false; +} + bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { // If the operands of I can be folded into a target instruction together with // I, duplicate and sink them. |