[SystemZ] Fix Operand Retrieval for Vector Reduction Intrinsic in `shouldExpandReduction` (#88874)

In the existing version, SystemZTTIImpl::shouldExpandReduction will create a `cast` error when handling vector reduction intrinsics that do not have the vector to reduce as their first operand, such as `llvm.vector.reduce.fadd` and `llvm.vector.reduce.fmul`. This commit fixes that problem by moving the cast into the case statement that handles the specific intrinsic, where the vector operand position is well-known.
author: Dominik Steenken <dost@de.ibm.com> 2024-04-19 14:18:05 +0200
committer: GitHub <noreply@github.com> 2024-04-19 14:18:05 +0200
commit: 5af9701985c6abba328dbedbd2d9c602dc46c4b0 (patch)
tree: 1d1ccc1fab9eed9f47548ab424828c07e0ed15a5
parent: e7c042f12fd6f3bbbbe9aeb37854d499aada8457 (diff)
download: llvm-5af9701985c6abba328dbedbd2d9c602dc46c4b0.zip
llvm-5af9701985c6abba328dbedbd2d9c602dc46c4b0.tar.gz
llvm-5af9701985c6abba328dbedbd2d9c602dc46c4b0.tar.bz2
1 files changed, 11 insertions, 14 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 4c9e78c..3cd1e05 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
@@ -1323,25 +1324,21 @@ SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 }
 
 bool SystemZTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
-  // Always expand on Subtargets without vector instructions
+  // Always expand on Subtargets without vector instructions.
   if (!ST->hasVector())
     return true;
 
-  // Always expand for operands that do not fill one vector reg
-  auto *Type = cast<FixedVectorType>(II->getOperand(0)->getType());
-  unsigned NumElts = Type->getNumElements();
-  unsigned ScalarSize = Type->getScalarSizeInBits();
-  unsigned MaxElts = SystemZ::VectorBits / ScalarSize;
-  if (NumElts < MaxElts)
-    return true;
-
-  // Otherwise
+  // Whether or not to expand is a per-intrinsic decision.
   switch (II->getIntrinsicID()) {
-  // Do not expand vector.reduce.add
-  case Intrinsic::vector_reduce_add:
-    // Except for i64, since the performance benefit is dubious there
-    return ScalarSize >= 64;
   default:
     return true;
+  // Do not expand vector.reduce.add...
+  case Intrinsic::vector_reduce_add:
+    auto *VType = cast<FixedVectorType>(II->getOperand(0)->getType());
+    // ...unless the scalar size is i64 or larger,
+    // or the operand vector is not full, since the
+    // performance benefit is dubious in those cases.
+    return VType->getScalarSizeInBits() >= 64 ||
+           VType->getPrimitiveSizeInBits() < SystemZ::VectorBits;
   }
 }
author	Dominik Steenken <dost@de.ibm.com>	2024-04-19 14:18:05 +0200
committer	GitHub <noreply@github.com>	2024-04-19 14:18:05 +0200
commit	5af9701985c6abba328dbedbd2d9c602dc46c4b0 (patch)
tree	1d1ccc1fab9eed9f47548ab424828c07e0ed15a5
parent	e7c042f12fd6f3bbbbe9aeb37854d499aada8457 (diff)
download	llvm-5af9701985c6abba328dbedbd2d9c602dc46c4b0.zip llvm-5af9701985c6abba328dbedbd2d9c602dc46c4b0.tar.gz llvm-5af9701985c6abba328dbedbd2d9c602dc46c4b0.tar.bz2