[ARM] Constant fold VCTP intrinsics

We can sometimes get into the situation where the operand to a vctp intrinsic becomes constant, such as after a loop is fully unrolled. This adds the constant folding needed for them, allowing them to simplify away and hopefully simplifying remaining instructions. Differential Revision: https://reviews.llvm.org/D84110
author: David Green <david.green@arm.com> 2020-07-21 08:24:37 +0100
committer: David Green <david.green@arm.com> 2020-07-21 11:39:31 +0100
commit: becaa6803ab532d15506829f0551a5fa49c39d7e (patch)
tree: 2e66adebb095a0fd973d67209c6d1196931e6b46 /llvm/lib/Analysis/ConstantFolding.cpp
parent: e37b220442ec4f629a30e5ac229f2ae17580dca9 (diff)
download: llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.zip
llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.tar.gz
llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.tar.bz2
1 files changed, 34 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 6feffcb..7c3e35b 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsARM.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
@@ -1456,6 +1457,11 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::experimental_vector_reduce_smax:
   case Intrinsic::experimental_vector_reduce_umin:
   case Intrinsic::experimental_vector_reduce_umax:
+  // Target intrinsics
+  case Intrinsic::arm_mve_vctp8:
+  case Intrinsic::arm_mve_vctp16:
+  case Intrinsic::arm_mve_vctp32:
+  case Intrinsic::arm_mve_vctp64:
     return true;
 
   // Floating point operations cannot be folded in strictfp functions in
@@ -2719,7 +2725,8 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
   SmallVector<Constant *, 4> Lane(Operands.size());
   Type *Ty = FVTy->getElementType();
 
-  if (IntrinsicID == Intrinsic::masked_load) {
+  switch (IntrinsicID) {
+  case Intrinsic::masked_load: {
     auto *SrcPtr = Operands[0];
     auto *Mask = Operands[2];
     auto *Passthru = Operands[3];
@@ -2757,6 +2764,32 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
       return nullptr;
     return ConstantVector::get(NewElements);
   }
+  case Intrinsic::arm_mve_vctp8:
+  case Intrinsic::arm_mve_vctp16:
+  case Intrinsic::arm_mve_vctp32:
+  case Intrinsic::arm_mve_vctp64: {
+    if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
+      unsigned Lanes = FVTy->getNumElements();
+      uint64_t Limit = Op->getZExtValue();
+      // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
+      // sure we get the limit right in that case and set all relevant lanes.
+      if (IntrinsicID == Intrinsic::arm_mve_vctp64)
+        Limit *= 2;
+
+      SmallVector<Constant *, 16> NCs;
+      for (unsigned i = 0; i < Lanes; i++) {
+        if (i < Limit)
+          NCs.push_back(ConstantInt::getTrue(Ty));
+        else
+          NCs.push_back(ConstantInt::getFalse(Ty));
+      }
+      return ConstantVector::get(NCs);
+    }
+    break;
+  }
+  default:
+    break;
+  }
 
   for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
     // Gather a column of constants.
author	David Green <david.green@arm.com>	2020-07-21 08:24:37 +0100
committer	David Green <david.green@arm.com>	2020-07-21 11:39:31 +0100
commit	becaa6803ab532d15506829f0551a5fa49c39d7e (patch)
tree	2e66adebb095a0fd973d67209c6d1196931e6b46 /llvm/lib/Analysis/ConstantFolding.cpp
parent	e37b220442ec4f629a30e5ac229f2ae17580dca9 (diff)
download	llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.zip llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.tar.gz llvm-becaa6803ab532d15506829f0551a5fa49c39d7e.tar.bz2