aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp2
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-cmp.ll5
2 files changed, 6 insertions, 1 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3c10246..cbfd2bc 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1036,7 +1036,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// split, we may need an expensive shuffle to get two in sync. This has the
// effect of making larger than legal compares (v8i32 for example)
// expensive.
- if (LT.second.getVectorNumElements() > 2) {
+ if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
if (LT.first > 1)
return LT.first * BaseCost +
BaseT::getScalarizationOverhead(VecCondTy, true, false);
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
index 17dd26c..bb517fa 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
@@ -22,6 +22,8 @@ define void @icmp() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v2i128 = icmp slt <2 x i128> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i128 = icmp slt <4 x i128> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v2i8 = icmp slt <2 x i8> undef, undef
@@ -44,6 +46,9 @@ define void @icmp() {
%v4i64 = icmp slt <4 x i64> undef, undef
%v8i64 = icmp slt <8 x i64> undef, undef
+ %v2i128 = icmp slt <2 x i128> undef, undef
+ %v4i128 = icmp slt <4 x i128> undef, undef
+
ret void
}