diff options
author | David Green <david.green@arm.com> | 2020-07-21 08:24:37 +0100 |
---|---|---|
committer | David Green <david.green@arm.com> | 2020-07-21 11:39:31 +0100 |
commit | becaa6803ab532d15506829f0551a5fa49c39d7e (patch) | |
tree | 2e66adebb095a0fd973d67209c6d1196931e6b46 /llvm/lib/Analysis/ConstantFolding.cpp | |
parent | e37b220442ec4f629a30e5ac229f2ae17580dca9 (diff) | |
download | llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.zip llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.tar.gz llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.tar.bz2 |
[ARM] Constant fold VCTP intrinsics
We can sometimes get into the situation where the operand to a vctp
intrinsic becomes constant, such as after a loop is fully unrolled. This
adds the constant folding needed for them, allowing them to simplify
away and hopefully simplifying remaining instructions.
Differential Revision: https://reviews.llvm.org/D84110
Diffstat (limited to 'llvm/lib/Analysis/ConstantFolding.cpp')
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 6feffcb..7c3e35b 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -1456,6 +1457,11 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::experimental_vector_reduce_smax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_umax: + // Target intrinsics + case Intrinsic::arm_mve_vctp8: + case Intrinsic::arm_mve_vctp16: + case Intrinsic::arm_mve_vctp32: + case Intrinsic::arm_mve_vctp64: return true; // Floating point operations cannot be folded in strictfp functions in @@ -2719,7 +2725,8 @@ static Constant *ConstantFoldVectorCall(StringRef Name, SmallVector<Constant *, 4> Lane(Operands.size()); Type *Ty = FVTy->getElementType(); - if (IntrinsicID == Intrinsic::masked_load) { + switch (IntrinsicID) { + case Intrinsic::masked_load: { auto *SrcPtr = Operands[0]; auto *Mask = Operands[2]; auto *Passthru = Operands[3]; @@ -2757,6 +2764,32 @@ static Constant *ConstantFoldVectorCall(StringRef Name, return nullptr; return ConstantVector::get(NewElements); } + case Intrinsic::arm_mve_vctp8: + case Intrinsic::arm_mve_vctp16: + case Intrinsic::arm_mve_vctp32: + case Intrinsic::arm_mve_vctp64: { + if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { + unsigned Lanes = FVTy->getNumElements(); + uint64_t Limit = Op->getZExtValue(); + // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make + // sure we get the limit right in that case and set all relevant lanes. + if (IntrinsicID == Intrinsic::arm_mve_vctp64) + Limit *= 2; + + SmallVector<Constant *, 16> NCs; + for (unsigned i = 0; i < Lanes; i++) { + if (i < Limit) + NCs.push_back(ConstantInt::getTrue(Ty)); + else + NCs.push_back(ConstantInt::getFalse(Ty)); + } + return ConstantVector::get(NCs); + } + break; + } + default: + break; + } for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { // Gather a column of constants. |