diff options
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/ARM/mve-cmp.ll | 5 |
2 files changed, 6 insertions, 1 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 3c10246..cbfd2bc 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1036,7 +1036,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // split, we may need an expensive shuffle to get two in sync. This has the // effect of making larger than legal compares (v8i32 for example) // expensive. - if (LT.second.getVectorNumElements() > 2) { + if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) { if (LT.first > 1) return LT.first * BaseCost + BaseT::getScalarizationOverhead(VecCondTy, true, false); diff --git a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll index 17dd26c..bb517fa 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll @@ -22,6 +22,8 @@ define void @icmp() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v2i128 = icmp slt <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i128 = icmp slt <4 x i128> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp slt <2 x i8> undef, undef @@ -44,6 +46,9 @@ define void @icmp() { %v4i64 = icmp slt <4 x i64> undef, undef %v8i64 = icmp slt <8 x i64> undef, undef + %v2i128 = icmp slt <2 x i128> undef, undef + %v4i128 = icmp slt <4 x i128> undef, undef + ret void } |